From c2819923c50ae666b31afb052a52c28850da6077 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 23 Sep 2021 22:59:36 -0400 Subject: [PATCH] Verilator_gantt now shows the predicted mtask times, eval times, and additional statistics. --- Changes | 3 +- bin/verilator_gantt | 170 ++- docs/guide/exe_verilator_gantt.rst | 40 +- docs/guide/figures/fig_gantt_min.png | Bin 44469 -> 37959 bytes docs/guide/simulating.rst | 2 +- include/verilated_threads.cpp | 26 +- include/verilated_threads.h | 29 +- src/V3EmitCModel.cpp | 67 +- src/V3Partition.cpp | 19 +- src/V3PartitionGraph.h | 3 + test_regress/t/t_gantt_io.dat | 1703 +++++++++++++------------- test_regress/t/t_gantt_io.out | 40 +- test_regress/t/t_gantt_io_vcd.out | 367 +++--- 13 files changed, 1387 insertions(+), 1082 deletions(-) diff --git a/Changes b/Changes index d3dd8f2a6..b11e2c554 100644 --- a/Changes +++ b/Changes @@ -11,7 +11,8 @@ contributors that suggested a given feature are shown in []. Thanks! Verilator 4.213 devel ========================== -* Include processor information in verilator_gantt data file. +* Verilator_gantt now shows the predicted mtask times, eval times, and additional statistics. +* Verilator_gantt data files now include processor information, to allow later processing. * Fix verilator_profcfunc profile accounting (#3115). * Fix display has no time units on class function (#3116). [Damien Pretet] * Fix removing if statement with side effect in condition (#3131). [Alexander Grobman] diff --git a/bin/verilator_gantt b/bin/verilator_gantt index 5fae5f46f..0a879588e 100755 --- a/bin/verilator_gantt +++ b/bin/verilator_gantt @@ -11,6 +11,8 @@ import statistics Threads = collections.defaultdict(lambda: {}) Mtasks = collections.defaultdict(lambda: {}) +Evals = collections.defaultdict(lambda: {}) +EvalLoops = collections.defaultdict(lambda: {}) Global = { 'args': {}, 'cpuinfo': collections.defaultdict(lambda: {}), @@ -29,8 +31,11 @@ def process(filename): def read_data(filename): with open(filename) as fh: re_prof = re.compile( - r'^VLPROF mtask\s(\d+)\sstart\s(\d+)\send\s(\d+)\selapsed\s(\d+)\spredict_time\s(\d+)\scpu\s(\d+)\son thread (\d+)' + r'^VLPROF mtask\s(\d+)\sstart\s(\d+)\selapsed\s(\d+)\spredict_start\s(\d+)\spredict_cost\s(\d+)\scpu\s(\d+)\son thread (\d+)' ) + re_eval = re.compile(r'^VLPROF eval\sstart\s(\d+)\selapsed\s(\d+)') + re_loop = re.compile( + r'^VLPROF eval_loop\sstart\s(\d+)\selapsed\s(\d+)') re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.])\s*') re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.])\s*$') re_stat = re.compile(r'VLPROF stat\s+(\S+)\s+([0-9.]+)') @@ -44,9 +49,10 @@ def read_data(filename): match = re_prof.match(line) mtask = int(match.group(1)) start = int(match.group(2)) - end = int(match.group(3)) - elapsed_time = int(match.group(4)) - predict_time = int(match.group(5)) + elapsed_time = int(match.group(3)) + end = start + elapsed_time + predict_start = int(match.group(4)) + predict_cost = int(match.group(5)) cpu = int(match.group(6)) thread = int(match.group(7)) if start not in Threads[thread]: @@ -54,12 +60,28 @@ def read_data(filename): Threads[thread][start]['mtask'] = mtask Threads[thread][start]['end'] = end Threads[thread][start]['cpu'] = cpu + Threads[thread][start]['predict_start'] = predict_start + Threads[thread][start]['predict_cost'] = predict_cost if 'elapsed' not in Mtasks[mtask]: Mtasks[mtask] = {'end': 0, 'elapsed': 0} + Mtasks[mtask]['thread'] = thread Mtasks[mtask]['elapsed'] += elapsed_time - Mtasks[mtask]['predict_cost'] = predict_time + Mtasks[mtask]['predict_start'] = predict_start + Mtasks[mtask]['predict_cost'] = predict_cost Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], end) + elif re_eval.match(line): + match = re_eval.match(line) + start = int(match.group(1)) + elapsed_time = int(match.group(2)) + Evals[start]['start'] = start + Evals[start]['end'] = start + elapsed_time + elif re_loop.match(line): + match = re_loop.match(line) + start = int(match.group(1)) + elapsed_time = int(match.group(2)) + EvalLoops[start]['start'] = start + EvalLoops[start]['end'] = start + elapsed_time elif re.match(r'^VLPROFTHREAD', line): None # pylint: disable=pointless-statement elif re_arg1.match(line): @@ -120,14 +142,21 @@ def report(): Global['cpus'][cpu] = {'cpu_time': 0} Global['cpus'][cpu]['cpu_time'] += elapsed - mt_mtask_time = 0 + measured_mt_mtask_time = 0 + predict_mt_mtask_time = 0 long_mtask_time = 0 - last_end = 0 + measured_last_end = 0 + predict_last_end = 0 for mtask in Mtasks: - mt_mtask_time += Mtasks[mtask]['elapsed'] - last_end = max(last_end, Mtasks[mtask]['end']) + measured_mt_mtask_time += Mtasks[mtask]['elapsed'] + predict_mt_mtask_time += Mtasks[mtask]['predict_cost'] + measured_last_end = max(measured_last_end, Mtasks[mtask]['end']) + predict_last_end = max( + predict_last_end, + Mtasks[mtask]['predict_start'] + Mtasks[mtask]['predict_cost']) long_mtask_time = max(long_mtask_time, Mtasks[mtask]['elapsed']) - Global['last_end'] = last_end + Global['measured_last_end'] = measured_last_end + Global['predict_last_end'] = predict_last_end report_graph() @@ -145,19 +174,30 @@ def report(): ncpus = len(Global['cpus']) print(" Total cpus used = %d" % ncpus) print(" Total yields = %d" % int(Global['stats']['yields'])) - print(" Total eval time = %d rdtsc ticks" % Global['last_end']) + print(" Total eval time = %d rdtsc ticks" % + Global['measured_last_end']) print(" Longest mtask time = %d rdtsc ticks" % long_mtask_time) - print(" All-thread mtask time = %d rdtsc ticks" % mt_mtask_time) - long_efficiency = long_mtask_time / (Global.get('last_end', 1)) + print(" All-thread mtask time = %d rdtsc ticks" % + measured_mt_mtask_time) + long_efficiency = long_mtask_time / (Global.get('measured_last_end', 1) or 1) print(" Longest-thread efficiency = %0.1f%%" % (long_efficiency * 100.0)) - mt_efficiency = mt_mtask_time / (Global.get('last_end', 1) * nthreads or 1) + mt_efficiency = measured_mt_mtask_time / ( + Global.get('measured_last_end', 1) * nthreads or 1) print(" All-thread efficiency = %0.1f%%" % (mt_efficiency * 100.0)) print(" All-thread speedup = %0.1f" % (mt_efficiency * nthreads)) if Global['rdtsc_cycle_time'] > 0: - ut = mt_mtask_time / Global['rdtsc_cycle_time'] - print("tot_mtask_cpu=" + mt_mtask_time + " cyc=" + + ut = measured_mt_mtask_time / Global['rdtsc_cycle_time'] + print("tot_mtask_cpu=" + measured_mt_mtask_time + " cyc=" + Global['rdtsc_cycle_time'] + " ut=" + ut) + predict_mt_efficiency = predict_mt_mtask_time / ( + Global.get('predict_last_end', 1) * nthreads or 1) + print("\nPrediction (what Verilator used for scheduling):") + print(" All-thread efficiency = %0.1f%%" % + (predict_mt_efficiency * 100.0)) + print(" All-thread speedup = %0.1f" % + (predict_mt_efficiency * nthreads)) + p2e_ratios = [] min_p2e = 1000000 min_mtask = None @@ -253,7 +293,7 @@ def report_cpus(): def report_graph(): time_per = Args.scale if time_per == 0: - time_per = Global['last_end'] / 40 # Start with 40 columns + time_per = Global['measured_last_end'] / 40 # Start with 40 columns while time_per > 10: (graph, conflicts) = _make_graph(time_per) if not conflicts: @@ -270,8 +310,9 @@ def report_graph(): print(" Legend: One character width = %s rdtsc ticks" % time_per) print(" Legend: '&' = multiple mtasks in this period (character width)") - scale = " <-%d rdtsc total" % Global['last_end'] - for col in range(len(scale), int(0.99 + (Global['last_end'] / time_per))): # pylint: disable=unused-variable + scale = " <-%d rdtsc total" % Global['measured_last_end'] + for col in range(len(scale), + int(0.99 + (Global['measured_last_end'] / time_per))): # pylint: disable=unused-variable scale += "-" print(" " + scale + "->") @@ -361,30 +402,39 @@ def write_vcd(filename): 'values': collections.defaultdict(lambda: {}), # {