From e990415ee09ec55adc2cf37fba720f180dcecef2 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 19 Sep 2024 19:28:07 -0400 Subject: [PATCH] Tests: Improve driver performance with forkserver --- test_regress/driver.py | 380 +++++++++++++++++++++-------------------- 1 file changed, 197 insertions(+), 183 deletions(-) diff --git a/test_regress/driver.py b/test_regress/driver.py index daac67150..05f838dd2 100755 --- a/test_regress/driver.py +++ b/test_regress/driver.py @@ -48,6 +48,8 @@ All_Scenarios = { test = None Arg_Tests = [] Arg_Driver_Verilator_Flags = [] +Quitting = False +Vltmt_Threads = 3 # So an 'import vltest_bootstrap' inside test files will do nothing sys.modules['vltest_bootstrap'] = {} @@ -223,14 +225,16 @@ class Capabilities: class Forker: - class Process: + class Job: - def __init__(self, _id, name, scenario, rerun_skipping, run_pre_start, run_on_start, - run_on_finish): + def __init__(self, _id, name, scenario, args, quiet, rerun_skipping, run_pre_start, + run_on_start, run_on_finish): self.fail_max_skip = False self.id = _id self.name = name self.scenario = scenario + self.args = args + self.quiet = quiet self.rerun_skipping = rerun_skipping self.run_pre_start = run_pre_start self.run_on_start = run_on_start @@ -248,8 +252,8 @@ class Forker: def __init__(self, max_processes): self._max_processes = max_processes self._id_next = 0 - self._left = collections.deque() # deque of Process - self._running = {} # key of pid, value of Process + self._left = collections.deque() # deque of Job + self._running = {} # key of pid, value of Job def is_any_left(self) -> bool: return self.num_running() > 0 or (len(self._left) > 0 and not Quitting) @@ -257,21 +261,27 @@ class Forker: def max_proc(self, n: int) -> None: self._max_processes = n - def poll(self) -> None: + def poll(self) -> bool: + """Run threads, returning if more work to do (if False, sleep)""" # We don't use SIGCHLD as conflicted with other handler, instead just poll completed = [] # Need two passes to avoid changing the list we are iterating nrunning = 0 + more_now = False for process in self._running.values(): if process.exitcode is not None: completed.append(process) + more_now = True else: nrunning += 1 - for process in completed: - self._finished(process) + # Start new work now, so running in background while we then collect completions while len(self._left) and nrunning < self._max_processes and not Quitting: process = self._left.popleft() self._run(process) nrunning += 1 + more_now = True + for process in completed: + self._finished(process) + return more_now def running(self) -> list: return self._running.values() @@ -279,15 +289,18 @@ class Forker: def num_running(self) -> int: return len(self._running) - def schedule(self, name, scenario, rerun_skipping, run_pre_start, run_on_start, run_on_finish): + def schedule(self, name, scenario, args, quiet, rerun_skipping, run_pre_start, run_on_start, + run_on_finish): # print("-Forker::schedule: [" + name + "]") - process = Forker.Process(self._id_next, - name=name, - scenario=scenario, - rerun_skipping=rerun_skipping, - run_pre_start=run_pre_start, - run_on_start=run_on_start, - run_on_finish=run_on_finish) + process = Forker.Job(self._id_next, + name=name, + scenario=scenario, + args=args, + quiet=quiet, + rerun_skipping=rerun_skipping, + run_pre_start=run_pre_start, + run_on_start=run_on_start, + run_on_finish=run_on_finish) self._id_next += 1 self._left.append(process) @@ -300,8 +313,10 @@ class Forker: # print("-Forker: [" + process.name + "] run_pre_start") process.run_pre_start(process) - ctx = multiprocessing.get_context('fork') - process.mprocess = ctx.Process(target=lambda: self.child_start(process)) + ctx = multiprocessing.get_context('forkserver') + process.mprocess = ctx.Process( # + target=forker.child_start, # pylint: disable=used-before-assignment + args=(process, )) process.mprocess.start() # print("-Forker: [" + process.name + "] RUNNING pid=" + str(process.pid)) @@ -355,6 +370,8 @@ class Runner: self.left_cnt += 1 forker.schedule(name=py_filename, scenario=scenario, + args=Args, + quiet=self.quiet, rerun_skipping=rerun_skipping, run_pre_start=self._run_pre_start_static, run_on_start=self._run_on_start_static, @@ -380,15 +397,15 @@ class Runner: @staticmethod def _run_on_start_static(process) -> None: - Runner.runner._run_on_start(process) # pylint: disable=protected-access - - def _run_on_start(self, process) -> None: # Running in context of child, so can't pass data to parent directly - if self.quiet: + if process.quiet: sys.stdout = open(os.devnull, 'w') # pylint: disable=R1732,unspecified-encoding sys.stderr = open(os.devnull, 'w') # pylint: disable=R1732,unspecified-encoding print("=" * 70) + global Args + Args = process.args + global test test = VlTest(py_filename=process.name, scenario=process.scenario, @@ -396,9 +413,9 @@ class Runner: test.oprint("=" * 50) test._prep() if process.rerun_skipping: - print(" ---------- Earlier logfiles below; test was rerunnable = 0\n") - os.system("cat $test->{obj_dir}/*.log") - print(" ---------- Earlier logfiles above; test was rerunnable = 0\n") + print(" ---------- Earlier logfiles below; test was rerunnable = False\n") + os.system("cat " + test.obj_dir + "/*.log") + print(" ---------- Earlier logfiles above; test was rerunnable = False\n") elif process.fail_max_skip: test.skip("Too many test failures; exceeded --fail-max") else: @@ -457,10 +474,11 @@ class Runner: self.print_summary(force=True) # Wait for all children to finish while forker.is_any_left(): - forker.poll() - if not interactive_debugger: + more_now = forker.poll() + if not Args.interactive_debugger: self.print_summary(force=False) - time.sleep(0.1) + if not more_now: + time.sleep(0.1) self.report(None) self.report(self.driver_log_filename) @@ -505,7 +523,7 @@ class Runner: @staticmethod def _py_filename_adjust(py_filename: str, tdir_def: str) -> list: # Return (py_filename, t_dir) - for tdir in Test_Dirs: # pylint: disable=redefined-outer-name + for tdir in Args.test_dirs: # pylint: disable=redefined-outer-name # t_dir used both absolutely and under obj_dir try_py_filename = tdir + "/" + os.path.basename(py_filename) if os.path.exists(try_py_filename): @@ -517,7 +535,7 @@ class Runner: return (py_filename, os.path.abspath(tdir_def)) def sprint_summary(self) -> str: - delta = time.time() - Start + delta = time.time() - Start # pylint: disable=used-before-assignment # Fudge of 120% works out about right so ETA correctly predicts completion time eta = 1.2 * ((self.all_cnt * (delta / ((self.all_cnt - self.left_cnt) + 0.001))) - delta) if delta < 10: @@ -542,7 +560,7 @@ class Runner: def _manual_args(self) -> str: # Return command line with scenarios stripped out = [] - for oarg in Orig_ARGV_Sw: + for oarg in Args.orig_argv_sw: showarg = True for val in All_Scenarios.values(): for allscarg in val: @@ -623,7 +641,7 @@ class VlTest: self.vlt_all = self.vlt or self.vltmt # Any Verilator scenario (self.py_filename, self.t_dir) = Runner._py_filename_adjust(self.py_filename, ".") - for tdir in Test_Dirs: # pylint: disable=redefined-outer-name + for tdir in Args.test_dirs: # pylint: disable=redefined-outer-name # t_dir used both absolutely and under obj_dir self.t_dir = None if os.path.exists(tdir + "/" + self.name + ".py"): @@ -1682,7 +1700,7 @@ class VlTest: data = rawbuf[0:got] if tee: sys.stdout.write(data.decode('latin-1')) - if interactive_debugger: + if Args.interactive_debugger: sys.stdout.flush() if logfh: logfh.write(data) @@ -1728,8 +1746,8 @@ class VlTest: # Read the log file a couple of times to allow for NFS delays if check_finished: - for tryn in range(self.tries() - 1, -1, -1): - if tryn != self.tries() - 1: + for tryn in range(Args.log_retries - 1, -1, -1): + if tryn != Args.log_retries - 1: time.sleep(1) moretry = tryn != 0 if not self._run_log_try(logfile, check_finished, moretry): @@ -2122,8 +2140,8 @@ class VlTest: def files_identical(self, fn1: str, fn2: str, is_logfile=False) -> None: """Test if two files have identical contents""" - for tryn in range(self.tries() - 1, -1, -1): - if tryn != self.tries() - 1: + for tryn in range(Args.log_retries, -1, -1): + if tryn != Args.log_retries - 1: time.sleep(1) moretry = tryn != 0 if not self._files_identical_try( @@ -2139,7 +2157,7 @@ class VlTest: f1 = None if not moretry: self.error("Files_identical file does not exist: " + fn1) - return True + return True # Retry try: f2 = open( # pylint: disable=consider-using-with fn2, 'r', encoding='latin-1', newline='\n') @@ -2148,7 +2166,7 @@ class VlTest: if not moretry: self.copy_if_golden(fn1, fn2) self.error("Files_identical file does not exist: " + fn2) - return True + return True # Retry again = self._files_identical_reader(f1, f2, fn1=fn1, @@ -2213,7 +2231,7 @@ class VlTest: if l1 != l2: # print(" clnGOT="+pformat(l1s)+"\n clnEXP="+pformat(l2s)) if moretry: - return True + return True # Retry self.error_keep_going("Line " + str(lineno_m1) + " miscompares; " + fn1 + " != " + fn2) for c in range(min(len(l1), len(l2))): @@ -2231,9 +2249,9 @@ class VlTest: else: print("To update reference: HARNESS_UPDATE_GOLDEN=1 {command} or --golden", file=sys.stderr) - return False + return False # No retry - bad - return False # Good + return False # No retry - good def files_identical_sorted(self, fn1: str, fn2: str, is_logfile=False) -> None: """Test if two files, after sorting both, have identical contents""" @@ -2379,16 +2397,6 @@ class VlTest: VlTest._cached_cfg_with_ccache = bool(re.match(r'OBJCACHE \?= ccache', mkf)) return VlTest._cached_cfg_with_ccache - @staticmethod - def tries() -> int: - # Number of retries when reading logfiles, generally only need many - # retries when system is busy running a lot of tests - if not forker.running: - return 2 - if len(Arg_Tests) > 3: - return 7 - return 2 - def glob_some(self, pattern: str) -> list: """Return list of filenames matching a glob, with at least one match required.""" files = glob.glob(pattern) @@ -2653,156 +2661,162 @@ def run_them() -> None: ###################################################################### ###################################################################### -# Globals +# Main -os.environ['PYTHONUNBUFFERED'] = "1" +if __name__ == '__main__': + os.environ['PYTHONUNBUFFERED'] = "1" -if ('VERILATOR_ROOT' not in os.environ) and os.path.isfile('../bin/verilator'): - os.environ['VERILATOR_ROOT'] = os.getcwd() + "/.." -if 'MAKE' not in os.environ: - os.environ['MAKE'] = "make" -if 'CXX' not in os.environ: - os.environ['CXX'] = "c++" -if 'TEST_REGRESS' in os.environ: - sys.exit("%Error: TEST_REGRESS environment variable is already set") -os.environ['TEST_REGRESS'] = os.getcwd() + if ('VERILATOR_ROOT' not in os.environ) and os.path.isfile('../bin/verilator'): + os.environ['VERILATOR_ROOT'] = os.getcwd() + "/.." + if 'MAKE' not in os.environ: + os.environ['MAKE'] = "make" + if 'CXX' not in os.environ: + os.environ['CXX'] = "c++" + if 'TEST_REGRESS' in os.environ: + sys.exit("%Error: TEST_REGRESS environment variable is already set") + os.environ['TEST_REGRESS'] = os.getcwd() -forker = None -Start = time.time() -Vltmt_Threads = 3 -_Parameter_Next_Level = None -Quitting = False + forker = None + Start = time.time() + _Parameter_Next_Level = None + def sig_int(signum, env) -> None: # pylint: disable=unused-argument + global Quitting + if Quitting: + sys.exit("\nQuitting (immediately)...") + print("\nQuitting... (send another interrupt signal for immediate quit)") + Quitting = True + if forker: + forker.kill_tree_all() -def sig_int(signum, env) -> None: # pylint: disable=unused-argument - global Quitting - if Quitting: - sys.exit("\nQuitting (immediately)...") - print("\nQuitting... (send another interrupt signal for immediate quit)") - Quitting = True - if forker: - forker.kill_tree_all() + signal.signal(signal.SIGINT, sig_int) + #--------------------------------------------------------------------- -signal.signal(signal.SIGINT, sig_int) + parser = argparse.ArgumentParser( + allow_abbrev=False, + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""Run Verilator regression tests""", + epilog="""driver.py invokes Verilator or another simulator on each test file. + See docs/internals.rst in the distribution for more information. -Orig_ARGV_Sw = [] -for arg in sys.argv: - if re.match(r'^-', arg) and not re.match(r'^-j', arg): - Orig_ARGV_Sw.append(arg) + Copyright 2024-2024 by Wilson Snyder. This program is free software; you + can redistribute it and/or modify it under the terms of either the GNU + Lesser General Public License Version 3 or the Perl Artistic License + Version 2.0. -#--------------------------------------------------------------------- + SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""") -parser = argparse.ArgumentParser( - allow_abbrev=False, - formatter_class=argparse.RawDescriptionHelpFormatter, - description="""Run Verilator regression tests""", - epilog="""driver.py invokes Verilator or another simulator on each test file. -See docs/internals.rst in the distribution for more information. + parser.add_argument('--benchmark', action='store', help='enable benchmarking') + parser.add_argument('--debug', action='store_const', const=9, help='enable debug') + # --debugi: see _parameter() + parser.add_argument('--fail-max', + action='store', + default=None, + help='run Verilator executable with gdb') + parser.add_argument('--gdb', action='store_true', help='run Verilator executable with gdb') + parser.add_argument('--gdbbt', + action='store_true', + help='run Verilated executable with gdb and backtrace') + parser.add_argument('--gdbsim', action='store_true', help='run Verilated executable with gdb') + parser.add_argument('--golden', '--gold', action='store_true', help='update golden .out files') + parser.add_argument('--hashset', action='store', help='split tests based on /') + parser.add_argument('--jobs', + '-j', + action='store', + default=0, + type=int, + help='parallel job count (0=cpu count)') + parser.add_argument('--quiet', + action='store_true', + help='suppress output except failures and progress') + parser.add_argument('--rerun', action='store_true', help='rerun all tests that fail') + parser.add_argument('--rr', action='store_true', help='run Verilator executable with rr') + parser.add_argument('--rrsim', action='store_true', help='run Verilated executable with rr') + parser.add_argument('--sanitize', action='store_true', help='run address sanitizer') + parser.add_argument('--site', + action='store_true', + help='include VERILATOR_TEST_SITE test list') + parser.add_argument('--stop', action='store_true', help='stop on the first error') + parser.add_argument('--trace', action='store_true', help='enable simulator waveform tracing') + parser.add_argument('--verbose', + action='store_true', + help='compile and run test in verbose mode') + parser.add_argument( + '--verilation', # -no-verilation undocumented debugging + action='store_true', + default=True, + help="don't run verilator compile() phase") + parser.add_argument('--verilated-debug', + action='store_true', + help='enable Verilated executable debug') + ## Scenarios + for scen, v in All_Scenarios.items(): + parser.add_argument('--' + scen, + dest='scenarios', + action='append_const', + const=scen, + help='scenario-enable ' + scen) -Copyright 2024-2024 by Wilson Snyder. This program is free software; you -can redistribute it and/or modify it under the terms of either the GNU -Lesser General Public License Version 3 or the Perl Artistic License -Version 2.0. + (Args, rest) = parser.parse_known_intermixed_args() -SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""") + for arg in rest: + _parameter(arg) -parser.add_argument('--benchmark', action='store', help='enable benchmarking') -parser.add_argument('--debug', action='store_const', const=9, help='enable debug') -# --debugi: see _parameter() -parser.add_argument('--fail-max', - action='store', - default=None, - help='run Verilator executable with gdb') -parser.add_argument('--gdb', action='store_true', help='run Verilator executable with gdb') -parser.add_argument('--gdbbt', - action='store_true', - help='run Verilated executable with gdb and backtrace') -parser.add_argument('--gdbsim', action='store_true', help='run Verilated executable with gdb') -parser.add_argument('--golden', '--gold', action='store_true', help='update golden .out files') -parser.add_argument('--hashset', action='store', help='split tests based on /') -parser.add_argument('--jobs', - '-j', - action='store', - default=0, - type=int, - help='parallel job count (0=cpu count)') -parser.add_argument('--quiet', - action='store_true', - help='suppress output except failures and progress') -parser.add_argument('--rerun', action='store_true', help='rerun all tests that fail') -parser.add_argument('--rr', action='store_true', help='run Verilator executable with rr') -parser.add_argument('--rrsim', action='store_true', help='run Verilated executable with rr') -parser.add_argument('--sanitize', action='store_true', help='run address sanitizer') -parser.add_argument('--site', action='store_true', help='include VERILATOR_TEST_SITE test list') -parser.add_argument('--stop', action='store_true', help='stop on the first error') -parser.add_argument('--trace', action='store_true', help='enable simulator waveform tracing') -parser.add_argument('--verbose', action='store_true', help='compile and run test in verbose mode') -parser.add_argument( - '--verilation', # -no-verilation undocumented debugging - action='store_true', - default=True, - help="don't run verilator compile() phase") -parser.add_argument('--verilated-debug', - action='store_true', - help='enable Verilated executable debug') -## Scenarios -for scen, v in All_Scenarios.items(): - parser.add_argument('--' + scen, - dest='scenarios', - action='append_const', - const=scen, - help='scenario-enable ' + scen) + if Args.debug: + Arg_Driver_Verilator_Flags.append("--debug --no-skip-identical") + logging.basicConfig(level=logging.DEBUG) + logging.info("In driver.py, ARGV=" + ' '.join(sys.argv)) -(Args, rest) = parser.parse_known_intermixed_args() + # Use Args for some global information, so gets passed to forked process + Args.interactive_debugger = Args.gdb or Args.gdbsim or Args.rr or Args.rrsim + if Args.jobs > 1 and Args.interactive_debugger: + sys.exit("%Error: Unable to use -j > 1 with --gdb* and --rr* options") -for arg in rest: - _parameter(arg) + if Args.golden: + os.environ['HARNESS_UPDATE_GOLDEN'] = '1' + if Args.jobs == 0: + Args.jobs = 1 if Args.interactive_debugger else calc_jobs() + if not Args.scenarios: + Args.scenarios = [] + Args.scenarios.append('dist') + Args.scenarios.append('vlt') -if Args.debug: - Arg_Driver_Verilator_Flags.append("--debug --no-skip-identical") - logging.basicConfig(level=logging.DEBUG) - logging.info("In driver.py, ARGV=" + ' '.join(sys.argv)) + Args.orig_argv_sw = [] + for arg in sys.argv: + if re.match(r'^-', arg) and not re.match(r'^-j', arg): + Args.orig_argv_sw.append(arg) -interactive_debugger = Args.gdb or Args.gdbsim or Args.rr or Args.rrsim -if Args.jobs > 1 and interactive_debugger: - sys.exit("%Error: Unable to use -j > 1 with --gdb* and --rr* options") + Args.test_dirs = ["t"] + if 'VERILATOR_TESTS_SITE' in os.environ: + if Args.site or len(Arg_Tests) >= 1: + for tdir in os.environ['VERILATOR_TESTS_SITE'].split(':'): + Args.test_dirs.append(tdir) -if Args.golden: - os.environ['HARNESS_UPDATE_GOLDEN'] = '1' -if Args.jobs == 0: - Args.jobs = 1 if interactive_debugger else calc_jobs() -if not Args.scenarios: - Args.scenarios = [] - Args.scenarios.append('dist') - Args.scenarios.append('vlt') + if not Arg_Tests: # Run everything + uniq = {} + for tdir in Args.test_dirs: + # Uniquify by inode, so different paths to same place get combined + stats = os.stat(tdir) + if stats.st_ino not in uniq: + uniq[stats.st_ino] = 1 + Arg_Tests += sorted(glob.glob(tdir + "/t_*.py")) + if Args.hashset: + _calc_hashset() -forker = Forker(Args.jobs) + # Number of retries when reading logfiles, generally only need many + # retries when system is busy running a lot of tests + Args.log_retries = 10 if (len(Arg_Tests) > 3) else 2 -Test_Dirs = ["t"] -if 'VERILATOR_TESTS_SITE' in os.environ: - if Args.site or len(Arg_Tests) >= 1: - for tdir in os.environ['VERILATOR_TESTS_SITE'].split(':'): - Test_Dirs.append(tdir) + forker = Forker(Args.jobs) -if not Arg_Tests: # Run everything - uniq = {} - for tdir in Test_Dirs: - # Uniquify by inode, so different paths to same place get combined - stats = os.stat(tdir) - if stats.st_ino not in uniq: - uniq[stats.st_ino] = 1 - Arg_Tests += sorted(glob.glob(tdir + "/t_*.py")) -if Args.hashset: - _calc_hashset() + if len(Arg_Tests) >= 2 and Args.jobs >= 2: + # Read supported into master process, so don't call every subprocess + Capabilities.warmup_cache() + # Without this tests such as t_debug_sigsegv_bt_bad.py will occasionally + # block on input and cause a SIGSTOP, then a "fg" was needed to resume testing. + print("== Many jobs; redirecting STDIN", file=sys.stderr) + # + sys.stdin = open("/dev/null", 'r', encoding="utf8") # pylint: disable=consider-using-with -if len(Arg_Tests) >= 2 and Args.jobs >= 2: - # Read supported into master process, so don't call every subprocess - Capabilities.warmup_cache() - # Without this tests such as t_debug_sigsegv_bt_bad.py will occasionally - # block on input and cause a SIGSTOP, then a "fg" was needed to resume testing. - print("== Many jobs; redirecting STDIN", file=sys.stderr) - # - sys.stdin = open("/dev/null", 'r', encoding="utf8") # pylint: disable=consider-using-with - -run_them() + run_them()