Fix verilator_gantt for hierarchically Verilated models (#5700)

2024-12-28 18:27:34 +00:00 · 2024-12-23 16:10:46 +01:00 · 2024-12-23 16:10:46 +01:00 · 72a47e16c1
commit 72a47e16c1
parent 530ebecfb7
6 changed files with 81 additions and 18 deletions
--- a/bin/verilator_gantt
+++ b/bin/verilator_gantt
@ -36,7 +36,7 @@ def read_data(filename):
        re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
        re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
        cpu = None
-        thread = None
+        thread = 0
        execGraphStart = None
        global LongestVcdStrValueLength
@ -54,11 +54,11 @@ def read_data(filename):
                if kind == "SECTION_PUSH":
                    LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload))
                    SectionStack.append(payload)
-                    Sections.append((tick, tuple(SectionStack)))
+                    Sections[thread].append((tick, tuple(SectionStack)))
                elif kind == "SECTION_POP":
                    assert SectionStack, "SECTION_POP without SECTION_PUSH"
                    SectionStack.pop()
-                    Sections.append((tick, tuple(SectionStack)))
+                    Sections[thread].append((tick, tuple(SectionStack)))
                elif kind == "MTASK_BEGIN":
                    mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups()
                    mtask = int(mtask)
@ -97,6 +97,7 @@ def read_data(filename):
                    print("-Unknown execution trace record: %s" % line)
            elif re_thread.match(line):
                thread = int(re_thread.match(line).group(1))
                Sections.append([])
            elif re.match(r'^VLPROF(THREAD|VERSION)', line):
                pass
            elif re_arg1.match(line):
@ -307,23 +308,27 @@ def report_cpus():
 def report_sections():
-    if not Sections:
+    for thread, section in enumerate(Sections):
-        return
+        if section:
-    print("\nSection profile:")
+            print(f"\nSection profile for thread {thread}:")
            report_section(section)
 def report_section(section):
    totalTime = collections.defaultdict(lambda: 0)
    selfTime = collections.defaultdict(lambda: 0)
    sectionTree = [0, {}, 1]  # [selfTime, childTrees, numberOfTimesEntered]
    prevTime = 0
    prevStack = ()
-    for time, stack in Sections:
+    for time, stack in section:
        if len(stack) > len(prevStack):
            scope = sectionTree
            for item in stack:
                scope = scope[1].setdefault(item, [0, {}, 0])
            scope[2] += 1
        dt = time - prevTime
        assert dt >= 0
        scope = sectionTree
        for item in prevStack:
            scope = scope[1].setdefault(item, [0, {}, 0])
@ -457,12 +462,13 @@ def write_vcd(filename):
                addValue(pcode, time, value)
        # Section graph
-        if Sections:
+        for thread, section in enumerate(Sections):
-            scode = getCode(LongestVcdStrValueLength * 8, "section", "trace")
+            if section:
-            dcode = getCode(32, "section", "depth")
+                scode = getCode(LongestVcdStrValueLength * 8, "section", f"t{thread}_trace")
-            for time, stack in Sections:
+                dcode = getCode(32, "section", f"t{thread}_depth")
-                addValue(scode, time, stack[-1] if stack else None)
+                for time, stack in section:
-                addValue(dcode, time, len(stack))
+                    addValue(scode, time, stack[-1] if stack else None)
                    addValue(dcode, time, len(stack))
        # Create output file
        fh.write("$version Generated by verilator_gantt $end\n")
--- a/src/V3EmitCModel.cpp
+++ b/src/V3EmitCModel.cpp
@ -432,7 +432,8 @@ class EmitCModel final : public EmitCFunc {
        puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n");
        puts("}\n");
-        if (v3Global.opt.profExec()) puts("vlSymsp->__Vm_executionProfilerp->configure();\n");
+        if (v3Global.opt.profExec() && !v3Global.opt.hierChild())
            puts("vlSymsp->__Vm_executionProfilerp->configure();\n");
        puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n");
        puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n");
--- a/src/V3Order.cpp
+++ b/src/V3Order.cpp
@ -123,8 +123,10 @@ AstCFunc* V3Order::order(AstNetlist* netlistp,  //
    }();
    if (v3Global.opt.profExec()) {
-        funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"func "
+        const string name
-                                               + tag + "\");\n"});
+            = (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + "func " + tag;
        funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\""
                                               + name + "\");\n"});
    }
    // Build the OrderGraph
--- a/src/V3Sched.cpp
+++ b/src/V3Sched.cpp
@ -156,7 +156,9 @@ AstNodeStmt* checkIterationLimit(AstNetlist* netlistp, const string& name, AstVa
    return ifp;
 }
-AstNodeStmt* profExecSectionPush(FileLine* flp, const string& name) {
+AstNodeStmt* profExecSectionPush(FileLine* flp, const string& section) {
    const string name
        = (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + section;
    return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"};
 }
--- a/test_regress/t/t_gantt_hier.py
+++ b/test_regress/t/t_gantt_hier.py
@ -0,0 +1,52 @@
 #!/usr/bin/env python3
 # DESCRIPTION: Verilator: Verilog Test driver/expect definition
 #
 # Copyright 2024 by Wilson Snyder. This program is free software; you
 # can redistribute it and/or modify it under the terms of either the GNU
 # Lesser General Public License Version 3 or the Perl Artistic License
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 # Test for bin/verilator_gantt,
 import vltest_bootstrap
 test.scenarios('vlt_all')
 test.top_filename = "t/t_gen_alw.v"  # Any, as long as runs a few cycles
 test.compile(
    v_flags2=["--prof-exec", "--hierarchical"],
    # Checks below care about thread count, so use 2 (minimum reasonable)
    threads=(2 if test.vltmt else 1))
 test.execute(all_run_flags=[
    "+verilator+prof+exec+start+2",
    " +verilator+prof+exec+window+2",
    " +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat",
    " +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"])  # yapf:disable
 # For now, verilator_gantt still reads from STDIN
 #  (probably it should take a file, gantt.dat like verilator_profcfunc)
 # The profiling data still goes direct to the runtime's STDOUT
 #  (maybe that should go to a separate file - gantt.dat?)
 test.run(cmd=[
    os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", test.obj_dir +
    "/profile_exec.dat", "--vcd " + test.obj_dir + "/profile_exec.vcd", "| tee " + test.obj_dir +
    "/gantt.log"
 ])
 if test.vltmt:
    test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 2')
    test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 8')
    # Predicted thread utilization should be less than 100%
    test.file_grep_not(test.obj_dir + "/gantt.log", r'Thread utilization =\s*\d\d\d+\.\d+%')
 else:
    test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 1')
    test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 0')
 test.file_grep(test.obj_dir + "/gantt.log", r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
 # Diff to itself, just to check parsing
 test.vcd_identical(test.obj_dir + "/profile_exec.vcd", test.obj_dir + "/profile_exec.vcd")
 test.passes()
--- a/test_regress/t/t_gen_alw.v
+++ b/test_regress/t/t_gen_alw.v
@ -59,7 +59,7 @@ endmodule
 module Test (/*AUTOARG*/
   // Inputs
   clk, in
-   );
+   ); /*verilator hier_block*/
   input clk;
   input [9:0] in;