mirror of
				https://github.com/catchorg/Catch2.git
				synced 2025-11-04 05:59:32 +01:00 
			
		
		
		
	Cleanup the shard integration test script
This commit is contained in:
		@@ -5,86 +5,154 @@ This test script verifies that sharding tests does change which tests are run.
 | 
				
			|||||||
This is done by running the binary multiple times, once to list all the tests,
 | 
					This is done by running the binary multiple times, once to list all the tests,
 | 
				
			||||||
once per shard to list the tests for that shard, and once again per shard to
 | 
					once per shard to list the tests for that shard, and once again per shard to
 | 
				
			||||||
execute the tests. The sharded lists are compared to the full list to ensure
 | 
					execute the tests. The sharded lists are compared to the full list to ensure
 | 
				
			||||||
none are skipped, duplicated, and that the order remains the same. This process
 | 
					none are skipped, duplicated, and that the order remains the same.
 | 
				
			||||||
is repeated for multiple command line argument combinations to ensure sharding
 | 
					 | 
				
			||||||
works with different filters and test orderings.
 | 
					 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import random
 | 
				
			||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import xml.etree.ElementTree as ET
 | 
					import xml.etree.ElementTree as ET
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from collections import namedtuple
 | 
					from collections import namedtuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from typing import List, Dict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					seed = random.randint(0, 2 ** 32 - 1)
 | 
				
			||||||
 | 
					number_of_shards = 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def make_base_commandline(self_test_exe):
 | 
					def make_base_commandline(self_test_exe):
 | 
				
			||||||
    return [
 | 
					    return [
 | 
				
			||||||
        self_test_exe,
 | 
					        self_test_exe,
 | 
				
			||||||
        '--reporter', 'xml',
 | 
					        '--reporter', 'xml',
 | 
				
			||||||
        "--shard-count", "5",
 | 
					        '--order', 'rand',
 | 
				
			||||||
        "--shard-index", "2",
 | 
					        '--rng-seed', str(seed),
 | 
				
			||||||
        "[generators]~[benchmarks]~[.]"
 | 
					        "[generators]~[benchmarks]~[.]"
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def list_tests(self_test_exe):
 | 
					
 | 
				
			||||||
 | 
					def list_tests(self_test_exe: str, extra_args: List[str] = None):
 | 
				
			||||||
    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
 | 
					    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
 | 
				
			||||||
 | 
					    if extra_args:
 | 
				
			||||||
 | 
					        cmd.extend(extra_args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    process = subprocess.Popen(
 | 
					    try:
 | 
				
			||||||
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
					        ret = subprocess.run(cmd,
 | 
				
			||||||
    stdout, stderr = process.communicate()
 | 
					                             stdout = subprocess.PIPE,
 | 
				
			||||||
    if stderr:
 | 
					                             stderr = subprocess.PIPE,
 | 
				
			||||||
        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 | 
					                             timeout = 10,
 | 
				
			||||||
 | 
					                             check = True,
 | 
				
			||||||
 | 
					                             universal_newlines = True)
 | 
				
			||||||
 | 
					    except subprocess.CalledProcessError as ex:
 | 
				
			||||||
 | 
					        print('Could not list tests:\n{}'.format(ex.stderr))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    root = ET.fromstring(stdout)
 | 
					    if ret.stderr:
 | 
				
			||||||
 | 
					        raise RuntimeError("Unexpected error output:\n" + ret.stderr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    root = ET.fromstring(ret.stdout)
 | 
				
			||||||
    result = [elem.text for elem in root.findall('./TestCase/Name')]
 | 
					    result = [elem.text for elem in root.findall('./TestCase/Name')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if len(result) < 2:
 | 
					    if len(result) < 2:
 | 
				
			||||||
        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
					        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
				
			||||||
            len(result)))
 | 
					            len(result)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return result
 | 
					    return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def execute_tests(self_test_exe):
 | 
					def execute_tests(self_test_exe: str, extra_args: List[str] = None):
 | 
				
			||||||
    cmd = make_base_commandline(self_test_exe)
 | 
					    cmd = make_base_commandline(self_test_exe)
 | 
				
			||||||
 | 
					    if extra_args:
 | 
				
			||||||
 | 
					        cmd.extend(extra_args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    process = subprocess.Popen(
 | 
					    try:
 | 
				
			||||||
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
					        ret = subprocess.run(cmd,
 | 
				
			||||||
    stdout, stderr = process.communicate()
 | 
					                             stdout = subprocess.PIPE,
 | 
				
			||||||
    if stderr:
 | 
					                             stderr = subprocess.PIPE,
 | 
				
			||||||
 | 
					                             timeout = 10,
 | 
				
			||||||
 | 
					                             check = True,
 | 
				
			||||||
 | 
					                             universal_newlines = True)
 | 
				
			||||||
 | 
					    except subprocess.CalledProcessError as ex:
 | 
				
			||||||
 | 
					        print('Could not list tests:\n{}'.format(ex.stderr))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if ret.stderr:
 | 
				
			||||||
        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 | 
					        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    root = ET.fromstring(stdout)
 | 
					    root = ET.fromstring(ret.stdout)
 | 
				
			||||||
    result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
 | 
					    result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if len(result) < 2:
 | 
					    if len(result) < 2:
 | 
				
			||||||
        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
					        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
 | 
				
			||||||
            len(result)))
 | 
					            len(result)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return result
 | 
					    return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def check_listed_and_executed_tests_match(listed_tests, executed_tests):
 | 
					def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
 | 
				
			||||||
        listed_names = set(listed_tests)
 | 
					    """
 | 
				
			||||||
        executed_names = set(executed_tests)
 | 
					    Asks the test binary for list of all tests, and also for lists of
 | 
				
			||||||
 | 
					    tests from shards.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        listed_string = "\n".join(listed_names)
 | 
					    The combination of shards is then checked whether it corresponds to
 | 
				
			||||||
        exeucted_string = "\n".join(executed_names)
 | 
					    the full list of all tests.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        assert listed_names == executed_names, (
 | 
					    Returns the dictionary of shard-index => listed tests for later use.
 | 
				
			||||||
            "Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
 | 
					    """
 | 
				
			||||||
 | 
					    all_tests = list_tests(self_test_exe)
 | 
				
			||||||
 | 
					    big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert all_tests == big_shard_tests, (
 | 
				
			||||||
 | 
					        "No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
 | 
				
			||||||
 | 
					            '\n'.join(all_tests),
 | 
				
			||||||
 | 
					            '\n'.join(big_shard_tests)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    shard_listings = dict()
 | 
				
			||||||
 | 
					    for shard_idx in range(number_of_shards):
 | 
				
			||||||
 | 
					        shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    shard_sizes = [len(v) for v in shard_listings.values()]
 | 
				
			||||||
 | 
					    assert len(all_tests) == sum(shard_sizes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Check that the shards have roughly the right sizes (e.g. we don't
 | 
				
			||||||
 | 
					    # have all tests in single shard and the others are empty)
 | 
				
			||||||
 | 
					    differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
 | 
				
			||||||
 | 
					    assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    combined_shards = [inner for outer in shard_listings.values() for inner in outer]
 | 
				
			||||||
 | 
					    assert all_tests == combined_shards, (
 | 
				
			||||||
 | 
					        "All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
 | 
				
			||||||
 | 
					            '\n'.join(all_tests),
 | 
				
			||||||
 | 
					            '\n'.join(combined_shards)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    shard_listings[-1] = all_tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return shard_listings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_sharding(self_test_exe):
 | 
					def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
 | 
				
			||||||
    listed_tests = list_tests(self_test_exe)
 | 
					    """
 | 
				
			||||||
    executed_tests = execute_tests(self_test_exe)
 | 
					    Runs the test binary and checks that the executed tests match the
 | 
				
			||||||
 | 
					    previously listed tests.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    check_listed_and_executed_tests_match(listed_tests, executed_tests)
 | 
					    Also does this for various shard indices, and that the combination
 | 
				
			||||||
 | 
					    of all shards matches the full run/listing.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    all_tests = execute_tests(self_test_exe)
 | 
				
			||||||
 | 
					    big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
 | 
				
			||||||
 | 
					    assert all_tests == big_shard_tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert listings[-1] == all_tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for shard_idx in range(number_of_shards):
 | 
				
			||||||
 | 
					        assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    self_test_exe, = sys.argv[1:]
 | 
					    self_test_exe, = sys.argv[1:]
 | 
				
			||||||
 | 
					    listings = test_sharded_listing(self_test_exe)
 | 
				
			||||||
    test_sharding(self_test_exe)
 | 
					    test_sharded_execution(self_test_exe, listings)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    sys.exit(main())
 | 
					    sys.exit(main())
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user