Fix verilator_gantt for hierarchically Verilated models (#5700)

This commit is contained in:
Bartłomiej Chmiel 2024-12-23 16:10:46 +01:00 committed by GitHub
parent 530ebecfb7
commit 72a47e16c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 81 additions and 18 deletions

View File

@ -36,7 +36,7 @@ def read_data(filename):
re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$') re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$') re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
cpu = None cpu = None
thread = None thread = 0
execGraphStart = None execGraphStart = None
global LongestVcdStrValueLength global LongestVcdStrValueLength
@ -54,11 +54,11 @@ def read_data(filename):
if kind == "SECTION_PUSH": if kind == "SECTION_PUSH":
LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload)) LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload))
SectionStack.append(payload) SectionStack.append(payload)
Sections.append((tick, tuple(SectionStack))) Sections[thread].append((tick, tuple(SectionStack)))
elif kind == "SECTION_POP": elif kind == "SECTION_POP":
assert SectionStack, "SECTION_POP without SECTION_PUSH" assert SectionStack, "SECTION_POP without SECTION_PUSH"
SectionStack.pop() SectionStack.pop()
Sections.append((tick, tuple(SectionStack))) Sections[thread].append((tick, tuple(SectionStack)))
elif kind == "MTASK_BEGIN": elif kind == "MTASK_BEGIN":
mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups() mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups()
mtask = int(mtask) mtask = int(mtask)
@ -97,6 +97,7 @@ def read_data(filename):
print("-Unknown execution trace record: %s" % line) print("-Unknown execution trace record: %s" % line)
elif re_thread.match(line): elif re_thread.match(line):
thread = int(re_thread.match(line).group(1)) thread = int(re_thread.match(line).group(1))
Sections.append([])
elif re.match(r'^VLPROF(THREAD|VERSION)', line): elif re.match(r'^VLPROF(THREAD|VERSION)', line):
pass pass
elif re_arg1.match(line): elif re_arg1.match(line):
@ -307,23 +308,27 @@ def report_cpus():
def report_sections(): def report_sections():
if not Sections: for thread, section in enumerate(Sections):
return if section:
print("\nSection profile:") print(f"\nSection profile for thread {thread}:")
report_section(section)
def report_section(section):
totalTime = collections.defaultdict(lambda: 0) totalTime = collections.defaultdict(lambda: 0)
selfTime = collections.defaultdict(lambda: 0) selfTime = collections.defaultdict(lambda: 0)
sectionTree = [0, {}, 1] # [selfTime, childTrees, numberOfTimesEntered] sectionTree = [0, {}, 1] # [selfTime, childTrees, numberOfTimesEntered]
prevTime = 0 prevTime = 0
prevStack = () prevStack = ()
for time, stack in Sections: for time, stack in section:
if len(stack) > len(prevStack): if len(stack) > len(prevStack):
scope = sectionTree scope = sectionTree
for item in stack: for item in stack:
scope = scope[1].setdefault(item, [0, {}, 0]) scope = scope[1].setdefault(item, [0, {}, 0])
scope[2] += 1 scope[2] += 1
dt = time - prevTime dt = time - prevTime
assert dt >= 0
scope = sectionTree scope = sectionTree
for item in prevStack: for item in prevStack:
scope = scope[1].setdefault(item, [0, {}, 0]) scope = scope[1].setdefault(item, [0, {}, 0])
@ -457,10 +462,11 @@ def write_vcd(filename):
addValue(pcode, time, value) addValue(pcode, time, value)
# Section graph # Section graph
if Sections: for thread, section in enumerate(Sections):
scode = getCode(LongestVcdStrValueLength * 8, "section", "trace") if section:
dcode = getCode(32, "section", "depth") scode = getCode(LongestVcdStrValueLength * 8, "section", f"t{thread}_trace")
for time, stack in Sections: dcode = getCode(32, "section", f"t{thread}_depth")
for time, stack in section:
addValue(scode, time, stack[-1] if stack else None) addValue(scode, time, stack[-1] if stack else None)
addValue(dcode, time, len(stack)) addValue(dcode, time, len(stack))

View File

@ -432,7 +432,8 @@ class EmitCModel final : public EmitCFunc {
puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n"); puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n");
puts("}\n"); puts("}\n");
if (v3Global.opt.profExec()) puts("vlSymsp->__Vm_executionProfilerp->configure();\n"); if (v3Global.opt.profExec() && !v3Global.opt.hierChild())
puts("vlSymsp->__Vm_executionProfilerp->configure();\n");
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n"); puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n");
puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n"); puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n");

View File

@ -123,8 +123,10 @@ AstCFunc* V3Order::order(AstNetlist* netlistp, //
}(); }();
if (v3Global.opt.profExec()) { if (v3Global.opt.profExec()) {
funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"func " const string name
+ tag + "\");\n"}); = (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + "func " + tag;
funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\""
+ name + "\");\n"});
} }
// Build the OrderGraph // Build the OrderGraph

View File

@ -156,7 +156,9 @@ AstNodeStmt* checkIterationLimit(AstNetlist* netlistp, const string& name, AstVa
return ifp; return ifp;
} }
AstNodeStmt* profExecSectionPush(FileLine* flp, const string& name) { AstNodeStmt* profExecSectionPush(FileLine* flp, const string& section) {
const string name
= (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + section;
return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"}; return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"};
} }

52
test_regress/t/t_gantt_hier.py Executable file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env python3
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2024 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
# Test for bin/verilator_gantt,
import vltest_bootstrap
test.scenarios('vlt_all')
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
test.compile(
v_flags2=["--prof-exec", "--hierarchical"],
# Checks below care about thread count, so use 2 (minimum reasonable)
threads=(2 if test.vltmt else 1))
test.execute(all_run_flags=[
"+verilator+prof+exec+start+2",
" +verilator+prof+exec+window+2",
" +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat",
" +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable
# For now, verilator_gantt still reads from STDIN
# (probably it should take a file, gantt.dat like verilator_profcfunc)
# The profiling data still goes direct to the runtime's STDOUT
# (maybe that should go to a separate file - gantt.dat?)
test.run(cmd=[
os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", test.obj_dir +
"/profile_exec.dat", "--vcd " + test.obj_dir + "/profile_exec.vcd", "| tee " + test.obj_dir +
"/gantt.log"
])
if test.vltmt:
test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 2')
test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 8')
# Predicted thread utilization should be less than 100%
test.file_grep_not(test.obj_dir + "/gantt.log", r'Thread utilization =\s*\d\d\d+\.\d+%')
else:
test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 1')
test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 0')
test.file_grep(test.obj_dir + "/gantt.log", r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
# Diff to itself, just to check parsing
test.vcd_identical(test.obj_dir + "/profile_exec.vcd", test.obj_dir + "/profile_exec.vcd")
test.passes()

View File

@ -59,7 +59,7 @@ endmodule
module Test (/*AUTOARG*/ module Test (/*AUTOARG*/
// Inputs // Inputs
clk, in clk, in
); ); /*verilator hier_block*/
input clk; input clk;
input [9:0] in; input [9:0] in;