mirror of
https://github.com/verilator/verilator.git
synced 2024-12-29 10:47:34 +00:00
Fix verilator_gantt for hierarchically Verilated models (#5700)
This commit is contained in:
parent
530ebecfb7
commit
72a47e16c1
@ -36,7 +36,7 @@ def read_data(filename):
|
|||||||
re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
|
re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
|
||||||
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
|
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
|
||||||
cpu = None
|
cpu = None
|
||||||
thread = None
|
thread = 0
|
||||||
execGraphStart = None
|
execGraphStart = None
|
||||||
|
|
||||||
global LongestVcdStrValueLength
|
global LongestVcdStrValueLength
|
||||||
@ -54,11 +54,11 @@ def read_data(filename):
|
|||||||
if kind == "SECTION_PUSH":
|
if kind == "SECTION_PUSH":
|
||||||
LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload))
|
LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload))
|
||||||
SectionStack.append(payload)
|
SectionStack.append(payload)
|
||||||
Sections.append((tick, tuple(SectionStack)))
|
Sections[thread].append((tick, tuple(SectionStack)))
|
||||||
elif kind == "SECTION_POP":
|
elif kind == "SECTION_POP":
|
||||||
assert SectionStack, "SECTION_POP without SECTION_PUSH"
|
assert SectionStack, "SECTION_POP without SECTION_PUSH"
|
||||||
SectionStack.pop()
|
SectionStack.pop()
|
||||||
Sections.append((tick, tuple(SectionStack)))
|
Sections[thread].append((tick, tuple(SectionStack)))
|
||||||
elif kind == "MTASK_BEGIN":
|
elif kind == "MTASK_BEGIN":
|
||||||
mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups()
|
mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups()
|
||||||
mtask = int(mtask)
|
mtask = int(mtask)
|
||||||
@ -97,6 +97,7 @@ def read_data(filename):
|
|||||||
print("-Unknown execution trace record: %s" % line)
|
print("-Unknown execution trace record: %s" % line)
|
||||||
elif re_thread.match(line):
|
elif re_thread.match(line):
|
||||||
thread = int(re_thread.match(line).group(1))
|
thread = int(re_thread.match(line).group(1))
|
||||||
|
Sections.append([])
|
||||||
elif re.match(r'^VLPROF(THREAD|VERSION)', line):
|
elif re.match(r'^VLPROF(THREAD|VERSION)', line):
|
||||||
pass
|
pass
|
||||||
elif re_arg1.match(line):
|
elif re_arg1.match(line):
|
||||||
@ -307,23 +308,27 @@ def report_cpus():
|
|||||||
|
|
||||||
|
|
||||||
def report_sections():
|
def report_sections():
|
||||||
if not Sections:
|
for thread, section in enumerate(Sections):
|
||||||
return
|
if section:
|
||||||
print("\nSection profile:")
|
print(f"\nSection profile for thread {thread}:")
|
||||||
|
report_section(section)
|
||||||
|
|
||||||
|
|
||||||
|
def report_section(section):
|
||||||
totalTime = collections.defaultdict(lambda: 0)
|
totalTime = collections.defaultdict(lambda: 0)
|
||||||
selfTime = collections.defaultdict(lambda: 0)
|
selfTime = collections.defaultdict(lambda: 0)
|
||||||
|
|
||||||
sectionTree = [0, {}, 1] # [selfTime, childTrees, numberOfTimesEntered]
|
sectionTree = [0, {}, 1] # [selfTime, childTrees, numberOfTimesEntered]
|
||||||
prevTime = 0
|
prevTime = 0
|
||||||
prevStack = ()
|
prevStack = ()
|
||||||
for time, stack in Sections:
|
for time, stack in section:
|
||||||
if len(stack) > len(prevStack):
|
if len(stack) > len(prevStack):
|
||||||
scope = sectionTree
|
scope = sectionTree
|
||||||
for item in stack:
|
for item in stack:
|
||||||
scope = scope[1].setdefault(item, [0, {}, 0])
|
scope = scope[1].setdefault(item, [0, {}, 0])
|
||||||
scope[2] += 1
|
scope[2] += 1
|
||||||
dt = time - prevTime
|
dt = time - prevTime
|
||||||
|
assert dt >= 0
|
||||||
scope = sectionTree
|
scope = sectionTree
|
||||||
for item in prevStack:
|
for item in prevStack:
|
||||||
scope = scope[1].setdefault(item, [0, {}, 0])
|
scope = scope[1].setdefault(item, [0, {}, 0])
|
||||||
@ -457,12 +462,13 @@ def write_vcd(filename):
|
|||||||
addValue(pcode, time, value)
|
addValue(pcode, time, value)
|
||||||
|
|
||||||
# Section graph
|
# Section graph
|
||||||
if Sections:
|
for thread, section in enumerate(Sections):
|
||||||
scode = getCode(LongestVcdStrValueLength * 8, "section", "trace")
|
if section:
|
||||||
dcode = getCode(32, "section", "depth")
|
scode = getCode(LongestVcdStrValueLength * 8, "section", f"t{thread}_trace")
|
||||||
for time, stack in Sections:
|
dcode = getCode(32, "section", f"t{thread}_depth")
|
||||||
addValue(scode, time, stack[-1] if stack else None)
|
for time, stack in section:
|
||||||
addValue(dcode, time, len(stack))
|
addValue(scode, time, stack[-1] if stack else None)
|
||||||
|
addValue(dcode, time, len(stack))
|
||||||
|
|
||||||
# Create output file
|
# Create output file
|
||||||
fh.write("$version Generated by verilator_gantt $end\n")
|
fh.write("$version Generated by verilator_gantt $end\n")
|
||||||
|
@ -432,7 +432,8 @@ class EmitCModel final : public EmitCFunc {
|
|||||||
puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n");
|
puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n");
|
||||||
puts("}\n");
|
puts("}\n");
|
||||||
|
|
||||||
if (v3Global.opt.profExec()) puts("vlSymsp->__Vm_executionProfilerp->configure();\n");
|
if (v3Global.opt.profExec() && !v3Global.opt.hierChild())
|
||||||
|
puts("vlSymsp->__Vm_executionProfilerp->configure();\n");
|
||||||
|
|
||||||
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n");
|
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n");
|
||||||
puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n");
|
puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n");
|
||||||
|
@ -123,8 +123,10 @@ AstCFunc* V3Order::order(AstNetlist* netlistp, //
|
|||||||
}();
|
}();
|
||||||
|
|
||||||
if (v3Global.opt.profExec()) {
|
if (v3Global.opt.profExec()) {
|
||||||
funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"func "
|
const string name
|
||||||
+ tag + "\");\n"});
|
= (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + "func " + tag;
|
||||||
|
funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\""
|
||||||
|
+ name + "\");\n"});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the OrderGraph
|
// Build the OrderGraph
|
||||||
|
@ -156,7 +156,9 @@ AstNodeStmt* checkIterationLimit(AstNetlist* netlistp, const string& name, AstVa
|
|||||||
return ifp;
|
return ifp;
|
||||||
}
|
}
|
||||||
|
|
||||||
AstNodeStmt* profExecSectionPush(FileLine* flp, const string& name) {
|
AstNodeStmt* profExecSectionPush(FileLine* flp, const string& section) {
|
||||||
|
const string name
|
||||||
|
= (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + section;
|
||||||
return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"};
|
return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
52
test_regress/t/t_gantt_hier.py
Executable file
52
test_regress/t/t_gantt_hier.py
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
# Test for bin/verilator_gantt,
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt_all')
|
||||||
|
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
||||||
|
|
||||||
|
test.compile(
|
||||||
|
v_flags2=["--prof-exec", "--hierarchical"],
|
||||||
|
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||||
|
threads=(2 if test.vltmt else 1))
|
||||||
|
|
||||||
|
test.execute(all_run_flags=[
|
||||||
|
"+verilator+prof+exec+start+2",
|
||||||
|
" +verilator+prof+exec+window+2",
|
||||||
|
" +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat",
|
||||||
|
" +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable
|
||||||
|
|
||||||
|
# For now, verilator_gantt still reads from STDIN
|
||||||
|
# (probably it should take a file, gantt.dat like verilator_profcfunc)
|
||||||
|
# The profiling data still goes direct to the runtime's STDOUT
|
||||||
|
# (maybe that should go to a separate file - gantt.dat?)
|
||||||
|
test.run(cmd=[
|
||||||
|
os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", test.obj_dir +
|
||||||
|
"/profile_exec.dat", "--vcd " + test.obj_dir + "/profile_exec.vcd", "| tee " + test.obj_dir +
|
||||||
|
"/gantt.log"
|
||||||
|
])
|
||||||
|
|
||||||
|
if test.vltmt:
|
||||||
|
test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 2')
|
||||||
|
test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 8')
|
||||||
|
# Predicted thread utilization should be less than 100%
|
||||||
|
test.file_grep_not(test.obj_dir + "/gantt.log", r'Thread utilization =\s*\d\d\d+\.\d+%')
|
||||||
|
else:
|
||||||
|
test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 1')
|
||||||
|
test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 0')
|
||||||
|
|
||||||
|
test.file_grep(test.obj_dir + "/gantt.log", r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
||||||
|
|
||||||
|
# Diff to itself, just to check parsing
|
||||||
|
test.vcd_identical(test.obj_dir + "/profile_exec.vcd", test.obj_dir + "/profile_exec.vcd")
|
||||||
|
|
||||||
|
test.passes()
|
@ -59,7 +59,7 @@ endmodule
|
|||||||
module Test (/*AUTOARG*/
|
module Test (/*AUTOARG*/
|
||||||
// Inputs
|
// Inputs
|
||||||
clk, in
|
clk, in
|
||||||
);
|
); /*verilator hier_block*/
|
||||||
input clk;
|
input clk;
|
||||||
input [9:0] in;
|
input [9:0] in;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user