mirror of
				https://github.com/catchorg/Catch2.git
				synced 2025-11-03 21:49:32 +01:00 
			
		
		
		
	Cleanup the shard integration test script
This commit is contained in:
		@@ -5,86 +5,154 @@ This test script verifies that sharding tests does change which tests are run.
 | 
			
		||||
This is done by running the binary multiple times, once to list all the tests,
 | 
			
		||||
once per shard to list the tests for that shard, and once again per shard to
 | 
			
		||||
execute the tests. The sharded lists are compared to the full list to ensure
 | 
			
		||||
none are skipped, duplicated, and that the order remains the same. This process
 | 
			
		||||
is repeated for multiple command line argument combinations to ensure sharding
 | 
			
		||||
works with different filters and test orderings.
 | 
			
		||||
none are skipped, duplicated, and that the order remains the same.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import random
 | 
			
		||||
import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
 | 
			
		||||
from collections import namedtuple
 | 
			
		||||
 | 
			
		||||
from typing import List, Dict
 | 
			
		||||
 | 
			
		||||
seed = random.randint(0, 2 ** 32 - 1)
 | 
			
		||||
number_of_shards = 5
 | 
			
		||||
 | 
			
		||||
def make_base_commandline(self_test_exe):
 | 
			
		||||
    return [
 | 
			
		||||
        self_test_exe,
 | 
			
		||||
        '--reporter', 'xml',
 | 
			
		||||
        "--shard-count", "5",
 | 
			
		||||
        "--shard-index", "2",
 | 
			
		||||
        '--order', 'rand',
 | 
			
		||||
        '--rng-seed', str(seed),
 | 
			
		||||
        "[generators]~[benchmarks]~[.]"
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
def list_tests(self_test_exe):
 | 
			
		||||
 | 
			
		||||
def list_tests(self_test_exe: str, extra_args: List[str] = None):
 | 
			
		||||
    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
 | 
			
		||||
    if extra_args:
 | 
			
		||||
        cmd.extend(extra_args)
 | 
			
		||||
 | 
			
		||||
    process = subprocess.Popen(
 | 
			
		||||
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
			
		||||
    stdout, stderr = process.communicate()
 | 
			
		||||
    if stderr:
 | 
			
		||||
        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 | 
			
		||||
    try:
 | 
			
		||||
        ret = subprocess.run(cmd,
 | 
			
		||||
                             stdout = subprocess.PIPE,
 | 
			
		||||
                             stderr = subprocess.PIPE,
 | 
			
		||||
                             timeout = 10,
 | 
			
		||||
                             check = True,
 | 
			
		||||
                             universal_newlines = True)
 | 
			
		||||
    except subprocess.CalledProcessError as ex:
 | 
			
		||||
        print('Could not list tests:\n{}'.format(ex.stderr))
 | 
			
		||||
 | 
			
		||||
    root = ET.fromstring(stdout)
 | 
			
		||||
    if ret.stderr:
 | 
			
		||||
        raise RuntimeError("Unexpected error output:\n" + ret.stderr)
 | 
			
		||||
 | 
			
		||||
    root = ET.fromstring(ret.stdout)
 | 
			
		||||
    result = [elem.text for elem in root.findall('./TestCase/Name')]
 | 
			
		||||
 | 
			
		||||
    if len(result) < 2:
 | 
			
		||||
        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
			
		||||
            len(result)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def execute_tests(self_test_exe):
 | 
			
		||||
def execute_tests(self_test_exe: str, extra_args: List[str] = None):
 | 
			
		||||
    cmd = make_base_commandline(self_test_exe)
 | 
			
		||||
    if extra_args:
 | 
			
		||||
        cmd.extend(extra_args)
 | 
			
		||||
 | 
			
		||||
    process = subprocess.Popen(
 | 
			
		||||
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
			
		||||
    stdout, stderr = process.communicate()
 | 
			
		||||
    if stderr:
 | 
			
		||||
    try:
 | 
			
		||||
        ret = subprocess.run(cmd,
 | 
			
		||||
                             stdout = subprocess.PIPE,
 | 
			
		||||
                             stderr = subprocess.PIPE,
 | 
			
		||||
                             timeout = 10,
 | 
			
		||||
                             check = True,
 | 
			
		||||
                             universal_newlines = True)
 | 
			
		||||
    except subprocess.CalledProcessError as ex:
 | 
			
		||||
        print('Could not list tests:\n{}'.format(ex.stderr))
 | 
			
		||||
 | 
			
		||||
    if ret.stderr:
 | 
			
		||||
        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 | 
			
		||||
 | 
			
		||||
    root = ET.fromstring(stdout)
 | 
			
		||||
    root = ET.fromstring(ret.stdout)
 | 
			
		||||
    result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
 | 
			
		||||
 | 
			
		||||
    if len(result) < 2:
 | 
			
		||||
        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
			
		||||
            len(result)))
 | 
			
		||||
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def check_listed_and_executed_tests_match(listed_tests, executed_tests):
 | 
			
		||||
        listed_names = set(listed_tests)
 | 
			
		||||
        executed_names = set(executed_tests)
 | 
			
		||||
def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
 | 
			
		||||
    """
 | 
			
		||||
    Asks the test binary for list of all tests, and also for lists of
 | 
			
		||||
    tests from shards.
 | 
			
		||||
 | 
			
		||||
        listed_string = "\n".join(listed_names)
 | 
			
		||||
        exeucted_string = "\n".join(executed_names)
 | 
			
		||||
    The combination of shards is then checked whether it corresponds to
 | 
			
		||||
    the full list of all tests.
 | 
			
		||||
 | 
			
		||||
        assert listed_names == executed_names, (
 | 
			
		||||
            "Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
 | 
			
		||||
    Returns the dictionary of shard-index => listed tests for later use.
 | 
			
		||||
    """
 | 
			
		||||
    all_tests = list_tests(self_test_exe)
 | 
			
		||||
    big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
 | 
			
		||||
 | 
			
		||||
    assert all_tests == big_shard_tests, (
 | 
			
		||||
        "No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
 | 
			
		||||
            '\n'.join(all_tests),
 | 
			
		||||
            '\n'.join(big_shard_tests)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    shard_listings = dict()
 | 
			
		||||
    for shard_idx in range(number_of_shards):
 | 
			
		||||
        shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 | 
			
		||||
 | 
			
		||||
    shard_sizes = [len(v) for v in shard_listings.values()]
 | 
			
		||||
    assert len(all_tests) == sum(shard_sizes)
 | 
			
		||||
 | 
			
		||||
    # Check that the shards have roughly the right sizes (e.g. we don't
 | 
			
		||||
    # have all tests in single shard and the others are empty)
 | 
			
		||||
    differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
 | 
			
		||||
    assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
 | 
			
		||||
 | 
			
		||||
    combined_shards = [inner for outer in shard_listings.values() for inner in outer]
 | 
			
		||||
    assert all_tests == combined_shards, (
 | 
			
		||||
        "All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
 | 
			
		||||
            '\n'.join(all_tests),
 | 
			
		||||
            '\n'.join(combined_shards)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
    shard_listings[-1] = all_tests
 | 
			
		||||
 | 
			
		||||
    return shard_listings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sharding(self_test_exe):
 | 
			
		||||
    listed_tests = list_tests(self_test_exe)
 | 
			
		||||
    executed_tests = execute_tests(self_test_exe)
 | 
			
		||||
def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
 | 
			
		||||
    """
 | 
			
		||||
    Runs the test binary and checks that the executed tests match the
 | 
			
		||||
    previously listed tests.
 | 
			
		||||
 | 
			
		||||
    check_listed_and_executed_tests_match(listed_tests, executed_tests)
 | 
			
		||||
    Also does this for various shard indices, and that the combination
 | 
			
		||||
    of all shards matches the full run/listing.
 | 
			
		||||
    """
 | 
			
		||||
    all_tests = execute_tests(self_test_exe)
 | 
			
		||||
    big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
 | 
			
		||||
    assert all_tests == big_shard_tests
 | 
			
		||||
 | 
			
		||||
    assert listings[-1] == all_tests
 | 
			
		||||
 | 
			
		||||
    for shard_idx in range(number_of_shards):
 | 
			
		||||
        assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    self_test_exe, = sys.argv[1:]
 | 
			
		||||
 | 
			
		||||
    test_sharding(self_test_exe)
 | 
			
		||||
    listings = test_sharded_listing(self_test_exe)
 | 
			
		||||
    test_sharded_execution(self_test_exe, listings)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    sys.exit(main())
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user