verilator/bin/verilator_gantt

494 lines
19 KiB
Plaintext
Raw Normal View History

2021-09-08 12:16:31 +00:00
#!/usr/bin/env python3
# pylint: disable=C0103,C0114,C0116,C0301,R0914,R0912,R0915,W0511,eval-used
######################################################################
2021-09-08 12:16:31 +00:00
import argparse
import collections
import math
import re
import statistics
# from pprint import pprint
Threads = collections.defaultdict(lambda: {})
Mtasks = collections.defaultdict(lambda: {})
Evals = collections.defaultdict(lambda: {})
EvalLoops = collections.defaultdict(lambda: {})
2021-09-08 12:16:31 +00:00
Global = {
'args': {},
'cpuinfo': collections.defaultdict(lambda: {}),
'rdtsc_cycle_time': 0,
'stats': {}
}
2021-09-08 12:16:31 +00:00
######################################################################
2021-09-08 12:16:31 +00:00
def process(filename):
read_data(filename)
report()
def read_data(filename):
with open(filename) as fh:
re_prof = re.compile(
r'^VLPROF mtask\s(\d+)\sstart\s(\d+)\selapsed\s(\d+)\spredict_start\s(\d+)\spredict_cost\s(\d+)\scpu\s(\d+)\son thread (\d+)'
2021-09-08 12:16:31 +00:00
)
re_eval = re.compile(r'^VLPROF eval\sstart\s(\d+)\selapsed\s(\d+)')
re_loop = re.compile(
r'^VLPROF eval_loop\sstart\s(\d+)\selapsed\s(\d+)')
re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$')
2021-09-08 12:16:31 +00:00
re_stat = re.compile(r'VLPROF stat\s+(\S+)\s+([0-9.]+)')
re_time = re.compile(r'rdtsc time = (\d+) ticks')
re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
cpu = None
for line in fh:
if re_prof.match(line):
match = re_prof.match(line)
mtask = int(match.group(1))
start = int(match.group(2))
elapsed_time = int(match.group(3))
end = start + elapsed_time
predict_start = int(match.group(4))
predict_cost = int(match.group(5))
2021-09-08 12:16:31 +00:00
cpu = int(match.group(6))
thread = int(match.group(7))
if start not in Threads[thread]:
Threads[thread][start] = {}
Threads[thread][start]['mtask'] = mtask
Threads[thread][start]['end'] = end
Threads[thread][start]['cpu'] = cpu
Threads[thread][start]['predict_start'] = predict_start
Threads[thread][start]['predict_cost'] = predict_cost
2021-09-08 12:16:31 +00:00
if 'elapsed' not in Mtasks[mtask]:
Mtasks[mtask] = {'end': 0, 'elapsed': 0}
Mtasks[mtask]['thread'] = thread
2021-09-08 12:16:31 +00:00
Mtasks[mtask]['elapsed'] += elapsed_time
Mtasks[mtask]['predict_start'] = predict_start
Mtasks[mtask]['predict_cost'] = predict_cost
2021-09-08 12:16:31 +00:00
Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], end)
elif re_eval.match(line):
match = re_eval.match(line)
start = int(match.group(1))
elapsed_time = int(match.group(2))
Evals[start]['start'] = start
Evals[start]['end'] = start + elapsed_time
elif re_loop.match(line):
match = re_loop.match(line)
start = int(match.group(1))
elapsed_time = int(match.group(2))
EvalLoops[start]['start'] = start
EvalLoops[start]['end'] = start + elapsed_time
2021-09-08 12:16:31 +00:00
elif re.match(r'^VLPROFTHREAD', line):
None # pylint: disable=pointless-statement
elif re_arg1.match(line):
match = re_arg1.match(line)
Global['args'][match.group(1)] = match.group(2)
elif re_arg2.match(line):
match = re_arg2.match(line)
Global['args'][match.group(1)] = match.group(2)
elif re_stat.match(line):
match = re_stat.match(line)
Global['stats'][match.group(1)] = match.group(2)
elif re_proc_cpu.match(line):
match = re_proc_cpu.match(line)
cpu = int(match.group(1))
elif cpu and re_proc_dat.match(line):
match = re_proc_dat.match(line)
term = match.group(1)
value = match.group(2)
term = re.sub(r'\s+$', '', term)
term = re.sub(r'\s+', '_', term)
value = re.sub(r'\s+$', '', value)
Global['cpuinfo'][cpu][term] = value
elif re.match(r'^#', line):
None # pylint: disable=pointless-statement
elif Args.debug:
print("-Unk: %s" % line)
# TODO -- this is parsing text printed by a client.
# Really, verilator proper should generate this
# if it's useful...
if re_time.match(line):
Global['rdtsc_cycle_time'] = re_time.group(1)
def re_match_result(regexp, line, result_to):
result_to = re.match(regexp, line)
return result_to
2021-09-08 12:16:31 +00:00
######################################################################
2021-09-08 12:16:31 +00:00
def report():
print("Verilator Gantt report")
2021-09-08 12:16:31 +00:00
print("\nArgument settings:")
for arg in sorted(Global['args'].keys()):
plus = "+" if re.match(r'^\+', arg) else " "
print(" %s%s%s" % (arg, plus, Global['args'][arg]))
2021-09-08 12:16:31 +00:00
nthreads = len(Threads)
Global['cpus'] = {}
for thread in Threads:
# Make potentially multiple characters per column
2021-09-08 12:16:31 +00:00
for start in Threads[thread]:
cpu = Threads[thread][start]['cpu']
elapsed = Threads[thread][start]['end'] - start
if cpu not in Global['cpus']:
Global['cpus'][cpu] = {'cpu_time': 0}
Global['cpus'][cpu]['cpu_time'] += elapsed
measured_mt_mtask_time = 0
predict_mt_mtask_time = 0
2021-09-08 12:16:31 +00:00
long_mtask_time = 0
measured_last_end = 0
predict_last_end = 0
2021-09-08 12:16:31 +00:00
for mtask in Mtasks:
measured_mt_mtask_time += Mtasks[mtask]['elapsed']
predict_mt_mtask_time += Mtasks[mtask]['predict_cost']
measured_last_end = max(measured_last_end, Mtasks[mtask]['end'])
predict_last_end = max(
predict_last_end,
Mtasks[mtask]['predict_start'] + Mtasks[mtask]['predict_cost'])
2021-09-08 12:16:31 +00:00
long_mtask_time = max(long_mtask_time, Mtasks[mtask]['elapsed'])
Global['measured_last_end'] = measured_last_end
Global['predict_last_end'] = predict_last_end
2021-09-08 12:16:31 +00:00
# If we know cycle time in the same (rdtsc) units,
# this will give us an actual utilization number,
# (how effectively we keep the cores busy.)
#
# It also gives us a number we can compare against
# serial mode, to estimate the overhead of data sharing,
# which will show up in the total elapsed time. (Overhead
# of synchronization and scheduling should not.)
2021-09-08 12:16:31 +00:00
print("\nAnalysis:")
print(" Total threads = %d" % nthreads)
print(" Total mtasks = %d" % len(Mtasks))
ncpus = len(Global['cpus'])
print(" Total cpus used = %d" % ncpus)
print(" Total yields = %d" % int(Global['stats']['yields']))
print(" Total evals = %d" % len(Evals))
print(" Total eval loops = %d" % len(EvalLoops))
print(" Total eval time = %d rdtsc ticks" %
Global['measured_last_end'])
2021-09-08 12:16:31 +00:00
print(" Longest mtask time = %d rdtsc ticks" % long_mtask_time)
print(" All-thread mtask time = %d rdtsc ticks" %
measured_mt_mtask_time)
2021-09-24 03:00:42 +00:00
long_efficiency = long_mtask_time / (Global.get('measured_last_end', 1)
or 1)
2021-09-08 12:16:31 +00:00
print(" Longest-thread efficiency = %0.1f%%" % (long_efficiency * 100.0))
mt_efficiency = measured_mt_mtask_time / (
Global.get('measured_last_end', 1) * nthreads or 1)
2021-09-08 12:16:31 +00:00
print(" All-thread efficiency = %0.1f%%" % (mt_efficiency * 100.0))
print(" All-thread speedup = %0.1f" % (mt_efficiency * nthreads))
if Global['rdtsc_cycle_time'] > 0:
ut = measured_mt_mtask_time / Global['rdtsc_cycle_time']
print("tot_mtask_cpu=" + measured_mt_mtask_time + " cyc=" +
2021-09-08 12:16:31 +00:00
Global['rdtsc_cycle_time'] + " ut=" + ut)
predict_mt_efficiency = predict_mt_mtask_time / (
Global.get('predict_last_end', 1) * nthreads or 1)
print("\nPrediction (what Verilator used for scheduling):")
print(" All-thread efficiency = %0.1f%%" %
(predict_mt_efficiency * 100.0))
print(" All-thread speedup = %0.1f" %
(predict_mt_efficiency * nthreads))
2021-09-08 12:16:31 +00:00
p2e_ratios = []
min_p2e = 1000000
min_mtask = None
max_p2e = -1000000
max_mtask = None
for mtask in sorted(Mtasks.keys()):
if Mtasks[mtask]['elapsed'] > 0:
2021-09-17 22:52:12 +00:00
if Mtasks[mtask]['predict_cost'] == 0:
Mtasks[mtask]['predict_cost'] = 1 # don't log(0) below
p2e_ratio = math.log(Mtasks[mtask]['predict_cost'] /
2021-09-08 12:16:31 +00:00
Mtasks[mtask]['elapsed'])
p2e_ratios.append(p2e_ratio)
if p2e_ratio > max_p2e:
max_p2e = p2e_ratio
max_mtask = mtask
if p2e_ratio < min_p2e:
min_p2e = p2e_ratio
min_mtask = mtask
print("\nStatistics:")
print(" min log(p2e) = %0.3f" % min_p2e, end="")
print(" from mtask %d (predict %d," %
2021-09-17 22:52:12 +00:00
(min_mtask, Mtasks[min_mtask]['predict_cost']),
2021-09-08 12:16:31 +00:00
end="")
print(" elapsed %d)" % Mtasks[min_mtask]['elapsed'])
print(" max log(p2e) = %0.3f" % max_p2e, end="")
print(" from mtask %d (predict %d," %
2021-09-17 22:52:12 +00:00
(max_mtask, Mtasks[max_mtask]['predict_cost']),
2021-09-08 12:16:31 +00:00
end="")
print(" elapsed %d)" % Mtasks[max_mtask]['elapsed'])
stddev = statistics.pstdev(p2e_ratios)
mean = statistics.mean(p2e_ratios)
print(" mean = %0.3f" % mean)
print(" stddev = %0.3f" % stddev)
print(" e ^ stddev = %0.3f" % math.exp(stddev))
report_cpus()
if nthreads > ncpus:
print()
print("%%Warning: There were fewer CPUs (%d) then threads (%d)." %
(ncpus, nthreads))
print(" : See docs on use of numactl.")
else:
if 'cpu_socket_cores_warning' in Global:
print()
print(
"%Warning: Multiple threads scheduled on same hyperthreaded core."
)
print(" : See docs on use of numactl.")
if 'cpu_sockets_warning' in Global:
print()
print("%Warning: Threads scheduled on multiple sockets.")
print(" : See docs on use of numactl.")
print()
def report_cpus():
print("\nCPUs:")
Global['cpu_sockets'] = collections.defaultdict(lambda: 0)
Global['cpu_socket_cores'] = collections.defaultdict(lambda: 0)
for cpu in sorted(Global['cpus'].keys()):
print(" cpu %d: " % cpu, end='')
print("cpu_time=%d" % Global['cpus'][cpu]['cpu_time'], end='')
socket = None
if cpu in Global['cpuinfo']:
cpuinfo = Global['cpuinfo'][cpu]
if 'physical_id' in cpuinfo and 'core_id' in cpuinfo:
socket = int(cpuinfo['physical_id'])
Global['cpu_sockets'][socket] += 1
print(" socket=%d" % socket, end='')
core = int(cpuinfo['core_id'])
Global['cpu_socket_cores'][str(socket) + "__" + str(core)] += 1
print(" core=%d" % core, end='')
if 'model_name' in cpuinfo:
model = cpuinfo['model_name']
2021-09-08 12:16:31 +00:00
print(" %s" % model, end='')
print()
if len(Global['cpu_sockets']) > 1:
Global['cpu_sockets_warning'] = True
for scn in Global['cpu_socket_cores'].values():
if scn > 1:
Global['cpu_socket_cores_warning'] = True
######################################################################
2021-09-08 12:16:31 +00:00
def write_vcd(filename):
print("Writing %s" % filename)
with open(filename, "w") as fh:
vcd = {
'values':
collections.defaultdict(lambda: {}), # {<time>}{<code>} = value
'sigs': {
'predicted_threads': {},
'measured_threads': {},
2021-09-08 12:16:31 +00:00
'cpus': {},
'evals': {},
2021-09-08 12:16:31 +00:00
'mtasks': {},
'Stats': {}
} # {<module>}{<sig}} = code
}
code = 0
parallelism = {
'measured': collections.defaultdict(lambda: 0),
'predicted': collections.defaultdict(lambda: 0)
}
parallelism['measured'][0] = 0
parallelism['predicted'][0] = 0
# Measured graph
2021-09-08 12:16:31 +00:00
for thread in sorted(Threads.keys()):
sig = "thread%d_mtask" % thread
if sig not in vcd['sigs']['measured_threads']:
vcd['sigs']['measured_threads'][sig] = code
2021-09-08 12:16:31 +00:00
code += 1
mcode = vcd['sigs']['measured_threads'][sig]
2021-09-08 12:16:31 +00:00
for start in sorted(Threads[thread]):
mtask = Threads[thread][start]['mtask']
end = Threads[thread][start]['end']
2021-09-08 12:16:31 +00:00
cpu = Threads[thread][start]['cpu']
vcd['values'][start][mcode] = mtask
vcd['values'][end][mcode] = None
parallelism['measured'][start] += 1
parallelism['measured'][end] -= 1
2021-09-08 12:16:31 +00:00
sig = "cpu%d_thread" % cpu
if sig not in vcd['sigs']['cpus']:
vcd['sigs']['cpus'][sig] = code
code += 1
ccode = vcd['sigs']['cpus'][sig]
vcd['values'][start][ccode] = thread
vcd['values'][end][ccode] = None
sig = "mtask%d_cpu" % mtask
if sig not in vcd['sigs']['mtasks']:
vcd['sigs']['mtasks'][sig] = code
code += 1
ccode = vcd['sigs']['mtasks'][sig]
vcd['values'][start][ccode] = cpu
vcd['values'][end][ccode] = None
# Eval graph
vcd['sigs']['evals']["eval"] = code
elcode = code
code += 1
n = 0
for eval_start in Evals:
eval_end = Evals[eval_start]['end']
n += 1
vcd['values'][eval_start][elcode] = n
vcd['values'][eval_end][elcode] = None
# Eval_loop graph
vcd['sigs']['evals']["eval_loop"] = code
elcode = code
2021-09-08 12:16:31 +00:00
code += 1
n = 0
for eval_start in EvalLoops:
eval_end = EvalLoops[eval_start]['end']
n += 1
vcd['values'][eval_start][elcode] = n
vcd['values'][eval_end][elcode] = None
# Predicted graph
for eval_start in EvalLoops:
eval_end = EvalLoops[eval_start]['end']
# Compute scale so predicted graph is of same width as eval
measured_scaling = (eval_end -
eval_start) / Global['predict_last_end']
# Predict mtasks that fill the time the eval occupied
for mtask in Mtasks:
thread = Mtasks[mtask]['thread']
pred_scaled_start = eval_start + int(
Mtasks[mtask]['predict_start'] * measured_scaling)
pred_scaled_end = eval_start + int(
(Mtasks[mtask]['predict_start'] +
Mtasks[mtask]['predict_cost']) * measured_scaling)
if pred_scaled_start == pred_scaled_end:
continue
sig = "predicted_thread%d_mtask" % thread
if sig not in vcd['sigs']['predicted_threads']:
vcd['sigs']['predicted_threads'][sig] = code
code += 1
mcode = vcd['sigs']['predicted_threads'][sig]
2021-09-08 12:16:31 +00:00
vcd['values'][pred_scaled_start][mcode] = mtask
vcd['values'][pred_scaled_end][mcode] = None
2021-09-08 12:16:31 +00:00
parallelism['predicted'][pred_scaled_start] += 1
parallelism['predicted'][pred_scaled_end] -= 1
# Parallelism graph
for measpred in ('measured', 'predicted'):
vcd['sigs']['Stats']["%s_parallelism" % measpred] = code
pcode = code
code += 1
value = 0
for time in sorted(parallelism[measpred].keys()):
value += parallelism[measpred][time]
vcd['values'][time][pcode] = value
# Create output file
2021-09-08 12:16:31 +00:00
fh.write("$version Generated by verilator_gantt $end\n")
fh.write("$timescale 1ns $end\n")
fh.write("\n")
all_codes = {}
fh.write(" $scope module gantt $end\n")
for module in sorted(vcd['sigs'].keys()):
fh.write(" $scope module %s $end\n" % module)
for sig in sorted(vcd['sigs'][module].keys()):
code = vcd['sigs'][module][sig]
fh.write(" $var wire 32 v%x %s [31:0] $end\n" % (code, sig))
all_codes[code] = 1
fh.write(" $upscope $end\n")
fh.write(" $upscope $end\n")
fh.write("$enddefinitions $end\n")
fh.write("\n")
first = True
for time in sorted(vcd['values']):
if first:
first = False
# Start with Z for any signals without time zero data
for code in sorted(all_codes.keys()):
if code not in vcd['values'][time]:
vcd['values'][time][code] = None
fh.write("#%d\n" % time)
for code in sorted(vcd['values'][time].keys()):
value = vcd['values'][time][code]
if value is None:
fh.write("bz v%x\n" % code)
else:
fh.write("b%s v%x\n" % (format(value, 'b'), code))
2021-09-08 12:16:31 +00:00
######################################################################
2021-09-08 12:16:31 +00:00
parser = argparse.ArgumentParser(
allow_abbrev=False,
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""Create Gantt chart of multi-threaded execution""",
epilog=
"""Verilator_gantt creates a visual representation to help analyze Verilator
#xmultithreaded simulation performance, by showing when each macro-task
#xstarts and ends, and showing when each thread is busy or idle.
2021-04-13 13:25:11 +00:00
For documentation see
2021-09-08 12:16:31 +00:00
https://verilator.org/guide/latest/exe_verilator_gantt.html
2021-01-01 15:29:54 +00:00
Copyright 2018-2021 by Wilson Snyder. This program is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License Version 3 or the Perl Artistic License
Version 2.0.
2021-09-08 12:16:31 +00:00
SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
parser.add_argument('--debug', action='store_true', help='enable debug')
parser.add_argument('--no-vcd',
help='disable creating vcd',
action='store_true')
parser.add_argument('--vcd',
help='filename for vcd outpue',
default='profile_threads.vcd')
parser.add_argument('filename',
help='input profile_threads.dat filename to process',
default='profile_threads.dat')
Args = parser.parse_args()
process(Args.filename)
if not Args.no_vcd:
write_vcd(Args.vcd)
######################################################################
2021-09-08 12:16:31 +00:00
# Local Variables:
# compile-command: "./verilator_gantt ../test_regress/t/t_gantt_io.dat"
# End: