#!/usr/bin/env python3 # pylint: disable=C0103,C0114,C0116,C0301,R0914,R0912,R0915,W0511,eval-used ###################################################################### import argparse import collections import math import re import statistics # from pprint import pprint Threads = collections.defaultdict(lambda: collections.defaultdict(lambda: {})) Mtasks = collections.defaultdict(lambda: {}) Evals = collections.defaultdict(lambda: {}) EvalLoops = collections.defaultdict(lambda: {}) Global = { 'args': {}, 'cpuinfo': collections.defaultdict(lambda: {}), 'rdtsc_cycle_time': 0, 'stats': {} } ###################################################################### def process(filename): read_data(filename) report() def read_data(filename): with open(filename) as fh: re_thread = re.compile(r'^VLPROFTHREAD (\d+)$') re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$') re_payload_mtaskBegin = re.compile( r'id (\d+) predictStart (\d+) cpu (\d+)') re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)') re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*') re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$') re_stat = re.compile(r'VLPROF stat\s+(\S+)\s+([0-9.]+)') re_time = re.compile(r'rdtsc time = (\d+) ticks') re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$') re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$') cpu = None thread = None lastEvalBeginTick = None lastEvalLoopBeginTick = None for line in fh: recordMatch = re_record.match(line) if recordMatch: kind, tick, payload = recordMatch.groups() tick = int(tick) payload = payload.strip() if kind == "EVAL_BEGIN": Evals[tick]['start'] = tick lastEvalBeginTick = tick elif kind == "EVAL_END": Evals[lastEvalBeginTick]['end'] = tick lastEvalBeginTick = None elif kind == "EVAL_LOOP_BEGIN": EvalLoops[tick]['start'] = tick lastEvalLoopBeginTick = tick elif kind == "EVAL_LOOP_END": EvalLoops[lastEvalLoopBeginTick]['end'] = tick lastEvalLoopBeginTick = None elif kind == "MTASK_BEGIN": mtask, predict_start, ecpu = re_payload_mtaskBegin.match( payload).groups() mtask = int(mtask) predict_start = int(predict_start) ecpu = int(ecpu) Threads[thread][tick]['mtask'] = mtask Threads[thread][tick]['predict_start'] = predict_start Threads[thread][tick]['cpu'] = ecpu if 'elapsed' not in Mtasks[mtask]: Mtasks[mtask] = {'end': 0, 'elapsed': 0} Mtasks[mtask]['begin'] = tick Mtasks[mtask]['thread'] = thread Mtasks[mtask]['predict_start'] = predict_start elif kind == "MTASK_END": mtask, predict_cost = re_payload_mtaskEnd.match( payload).groups() mtask = int(mtask) predict_cost = int(predict_cost) begin = Mtasks[mtask]['begin'] Threads[thread][begin]['end'] = tick Threads[thread][begin]['predict_cost'] = predict_cost Mtasks[mtask]['elapsed'] += tick - begin Mtasks[mtask]['predict_cost'] = predict_cost Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick) elif Args.debug: print("-Unknown execution trace record: %s" % line) elif re_thread.match(line): thread = int(re_thread.match(line).group(1)) elif re.match(r'^VLPROF(THREAD|VERSION)', line): pass elif re_arg1.match(line): match = re_arg1.match(line) Global['args'][match.group(1)] = match.group(2) elif re_arg2.match(line): match = re_arg2.match(line) Global['args'][match.group(1)] = match.group(2) elif re_stat.match(line): match = re_stat.match(line) Global['stats'][match.group(1)] = match.group(2) elif re_proc_cpu.match(line): match = re_proc_cpu.match(line) cpu = int(match.group(1)) elif cpu and re_proc_dat.match(line): match = re_proc_dat.match(line) term = match.group(1) value = match.group(2) term = re.sub(r'\s+$', '', term) term = re.sub(r'\s+', '_', term) value = re.sub(r'\s+$', '', value) Global['cpuinfo'][cpu][term] = value elif re.match(r'^#', line): pass elif Args.debug: print("-Unk: %s" % line) # TODO -- this is parsing text printed by a client. # Really, verilator proper should generate this # if it's useful... if re_time.match(line): Global['rdtsc_cycle_time'] = re_time.group(1) def re_match_result(regexp, line, result_to): result_to = re.match(regexp, line) return result_to ###################################################################### def report(): print("Verilator Gantt report") print("\nArgument settings:") for arg in sorted(Global['args'].keys()): plus = "+" if re.match(r'^\+', arg) else " " print(" %s%s%s" % (arg, plus, Global['args'][arg])) nthreads = int(Global['stats']['threads']) Global['cpus'] = {} for thread in Threads: # Make potentially multiple characters per column for start in Threads[thread]: if not Threads[thread][start]: continue cpu = Threads[thread][start]['cpu'] elapsed = Threads[thread][start]['end'] - start if cpu not in Global['cpus']: Global['cpus'][cpu] = {'cpu_time': 0} Global['cpus'][cpu]['cpu_time'] += elapsed measured_mt_mtask_time = 0 predict_mt_mtask_time = 0 long_mtask_time = 0 measured_last_end = 0 predict_last_end = 0 for mtask in Mtasks: measured_mt_mtask_time += Mtasks[mtask]['elapsed'] predict_mt_mtask_time += Mtasks[mtask]['predict_cost'] measured_last_end = max(measured_last_end, Mtasks[mtask]['end']) predict_last_end = max( predict_last_end, Mtasks[mtask]['predict_start'] + Mtasks[mtask]['predict_cost']) long_mtask_time = max(long_mtask_time, Mtasks[mtask]['elapsed']) Global['measured_last_end'] = measured_last_end Global['predict_last_end'] = predict_last_end # If we know cycle time in the same (rdtsc) units, # this will give us an actual utilization number, # (how effectively we keep the cores busy.) # # It also gives us a number we can compare against # serial mode, to estimate the overhead of data sharing, # which will show up in the total elapsed time. (Overhead # of synchronization and scheduling should not.) print("\nAnalysis:") print(" Total threads = %d" % nthreads) print(" Total mtasks = %d" % len(Mtasks)) ncpus = max(len(Global['cpus']), 1) print(" Total cpus used = %d" % ncpus) print(" Total yields = %d" % int(Global['stats'].get('yields', 0))) print(" Total evals = %d" % len(Evals)) print(" Total eval loops = %d" % len(EvalLoops)) if Mtasks: print(" Total eval time = %d rdtsc ticks" % Global['measured_last_end']) print(" Longest mtask time = %d rdtsc ticks" % long_mtask_time) print(" All-thread mtask time = %d rdtsc ticks" % measured_mt_mtask_time) long_efficiency = long_mtask_time / (Global.get( 'measured_last_end', 1) or 1) print(" Longest-thread efficiency = %0.1f%%" % (long_efficiency * 100.0)) mt_efficiency = measured_mt_mtask_time / ( Global.get('measured_last_end', 1) * nthreads or 1) print(" All-thread efficiency = %0.1f%%" % (mt_efficiency * 100.0)) print(" All-thread speedup = %0.1f" % (mt_efficiency * nthreads)) if Global['rdtsc_cycle_time'] > 0: ut = measured_mt_mtask_time / Global['rdtsc_cycle_time'] print("tot_mtask_cpu=" + measured_mt_mtask_time + " cyc=" + Global['rdtsc_cycle_time'] + " ut=" + ut) predict_mt_efficiency = predict_mt_mtask_time / ( Global.get('predict_last_end', 1) * nthreads or 1) print("\nPrediction (what Verilator used for scheduling):") print(" All-thread efficiency = %0.1f%%" % (predict_mt_efficiency * 100.0)) print(" All-thread speedup = %0.1f" % (predict_mt_efficiency * nthreads)) p2e_ratios = [] min_p2e = 1000000 min_mtask = None max_p2e = -1000000 max_mtask = None for mtask in sorted(Mtasks.keys()): if Mtasks[mtask]['elapsed'] > 0: if Mtasks[mtask]['predict_cost'] == 0: Mtasks[mtask]['predict_cost'] = 1 # don't log(0) below p2e_ratio = math.log(Mtasks[mtask]['predict_cost'] / Mtasks[mtask]['elapsed']) p2e_ratios.append(p2e_ratio) if p2e_ratio > max_p2e: max_p2e = p2e_ratio max_mtask = mtask if p2e_ratio < min_p2e: min_p2e = p2e_ratio min_mtask = mtask print("\nMTask statistics:") print(" min log(p2e) = %0.3f" % min_p2e, end="") print(" from mtask %d (predict %d," % (min_mtask, Mtasks[min_mtask]['predict_cost']), end="") print(" elapsed %d)" % Mtasks[min_mtask]['elapsed']) print(" max log(p2e) = %0.3f" % max_p2e, end="") print(" from mtask %d (predict %d," % (max_mtask, Mtasks[max_mtask]['predict_cost']), end="") print(" elapsed %d)" % Mtasks[max_mtask]['elapsed']) stddev = statistics.pstdev(p2e_ratios) mean = statistics.mean(p2e_ratios) print(" mean = %0.3f" % mean) print(" stddev = %0.3f" % stddev) print(" e ^ stddev = %0.3f" % math.exp(stddev)) report_cpus() if nthreads > ncpus: print() print("%%Warning: There were fewer CPUs (%d) then threads (%d)." % (ncpus, nthreads)) print(" : See docs on use of numactl.") else: if 'cpu_socket_cores_warning' in Global: print() print( "%Warning: Multiple threads scheduled on same hyperthreaded core." ) print(" : See docs on use of numactl.") if 'cpu_sockets_warning' in Global: print() print("%Warning: Threads scheduled on multiple sockets.") print(" : See docs on use of numactl.") print() def report_cpus(): print("\nCPUs:") Global['cpu_sockets'] = collections.defaultdict(lambda: 0) Global['cpu_socket_cores'] = collections.defaultdict(lambda: 0) for cpu in sorted(Global['cpus'].keys()): print(" cpu %d: " % cpu, end='') print("cpu_time=%d" % Global['cpus'][cpu]['cpu_time'], end='') socket = None if cpu in Global['cpuinfo']: cpuinfo = Global['cpuinfo'][cpu] if 'physical_id' in cpuinfo and 'core_id' in cpuinfo: socket = int(cpuinfo['physical_id']) Global['cpu_sockets'][socket] += 1 print(" socket=%d" % socket, end='') core = int(cpuinfo['core_id']) Global['cpu_socket_cores'][str(socket) + "__" + str(core)] += 1 print(" core=%d" % core, end='') if 'model_name' in cpuinfo: model = cpuinfo['model_name'] print(" %s" % model, end='') print() if len(Global['cpu_sockets']) > 1: Global['cpu_sockets_warning'] = True for scn in Global['cpu_socket_cores'].values(): if scn > 1: Global['cpu_socket_cores_warning'] = True ###################################################################### def write_vcd(filename): print("Writing %s" % filename) with open(filename, "w") as fh: vcd = { 'values': collections.defaultdict(lambda: {}), # {