diff --git a/src/V3Ast.h b/src/V3Ast.h index 9665245dc..1e717b6e5 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -2034,6 +2034,11 @@ template <> inline bool AstNode::privateMayBeUnder(const AstNode* template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath); } +template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { + if (VN_IS(nodep, ExecGraph)) return false; // Should not nest + if (VN_IS(nodep, NodeStmt)) return false; // Should be directly under CFunc + return true; +} inline std::ostream& operator<<(std::ostream& os, const AstNode* rhs) { if (!rhs) { diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 14b15428a..08bcbd234 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -225,9 +225,10 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) { } } -AstExecGraph::AstExecGraph(FileLine* fileline) +AstExecGraph::AstExecGraph(FileLine* fileline, const string& name) : ASTGEN_SUPER_ExecGraph(fileline) - , m_depGraphp{new V3Graph} {} + , m_depGraphp{new V3Graph} + , m_name{name} {} AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 962f0c1fc..e4ba5a4c0 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -9199,27 +9199,30 @@ public: class AstExecGraph final : public AstNode { // For parallel execution, this node contains a dependency graph. Each - // node in the graph is an ExecMTask, which contains a body for the - // mtask, which contains a set of AstActive's, each of which calls a - // leaf AstCFunc. whew! + // vertex in the graph is an ExecMTask, which contains a body for the + // mtask (an AstMTaskBody), which contains sequentially executed statements. // - // The mtask bodies are also children of this node, so we can visit - // them without traversing the graph (it's not always needed to - // traverse the graph.) + // The AstMTaskBody nodes are also children of this node, so we can visit + // them without traversing the graph. private: - V3Graph* const m_depGraphp; // contains ExecMTask's + V3Graph* const m_depGraphp; // contains ExecMTask vertices + const string m_name; // Name of this AstExecGraph (for uniqueness at code generation) public: - explicit AstExecGraph(FileLine* fl); + explicit AstExecGraph(FileLine* fl, const string& name); ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph) virtual ~AstExecGraph() override; virtual const char* broken() const override { BROKEN_RTN(!m_depGraphp); return nullptr; } + virtual string name() const override { return m_name; } + V3Graph* depGraphp() { return m_depGraphp; } const V3Graph* depGraphp() const { return m_depGraphp; } - V3Graph* mutableDepGraphp() { return m_depGraphp; } - void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); } + // op1: The mtask bodies + AstMTaskBody* mTaskBodiesp() const { return VN_AS(op1p(), MTaskBody); } + void addMTaskBodyp(AstMTaskBody* bodyp) { addOp1p(bodyp); } + // op2: In later phases, the statements that start the parallel execution void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); } }; @@ -9319,13 +9322,15 @@ private: AstConstPool* const m_constPoolp; // Reference to constant pool, for faster lookup AstPackage* m_dollarUnitPkgp = nullptr; // $unit AstCFunc* m_evalp = nullptr; // The '_eval' function - AstExecGraph* m_execGraphp = nullptr; // Execution MTask graph for threads>1 mode AstVarScope* m_dpiExportTriggerp = nullptr; // The DPI export trigger variable AstTopScope* m_topScopep = nullptr; // The singleton AstTopScope under the top module VTimescale m_timeunit; // Global time unit VTimescale m_timeprecision; // Global time precision bool m_changeRequest = false; // Have _change_request method bool m_timescaleSpecified = false; // Input HDL specified timescale + uint32_t m_nextFreeMTaskID = 1; // Next unique MTask ID within netlist + // starts at 1 so 0 means no MTask ID + uint32_t m_nextFreeMTaskProfilingID = 0; // Next unique ID to use for PGO public: AstNetlist(); ASTNODE_NODE_FUNCS(Netlist) @@ -9369,8 +9374,6 @@ public: } AstCFunc* evalp() const { return m_evalp; } void evalp(AstCFunc* evalp) { m_evalp = evalp; } - AstExecGraph* execGraphp() const { return m_execGraphp; } - void execGraphp(AstExecGraph* graphp) { m_execGraphp = graphp; } AstVarScope* dpiExportTriggerp() const { return m_dpiExportTriggerp; } void dpiExportTriggerp(AstVarScope* varScopep) { m_dpiExportTriggerp = varScopep; } AstTopScope* topScopep() const { return m_topScopep; } @@ -9390,6 +9393,9 @@ public: void timeprecisionMerge(FileLine*, const VTimescale& value); void timescaleSpecified(bool specified) { m_timescaleSpecified = specified; } bool timescaleSpecified() const { return m_timescaleSpecified; } + uint32_t allocNextMTaskID() { return m_nextFreeMTaskID++; } + uint32_t allocNextMTaskProfilingID() { return m_nextFreeMTaskProfilingID++; } + uint32_t usedMTaskProfilingIDs() const { return m_nextFreeMTaskProfilingID; } }; //###################################################################### diff --git a/src/V3Clock.cpp b/src/V3Clock.cpp index 77b655963..a9072fe29 100644 --- a/src/V3Clock.cpp +++ b/src/V3Clock.cpp @@ -411,7 +411,8 @@ private: } } virtual void visit(AstExecGraph* nodep) override { - for (m_mtaskBodyp = VN_AS(nodep->op1p(), MTaskBody); m_mtaskBodyp; + VL_RESTORER(m_mtaskBodyp); + for (m_mtaskBodyp = nodep->mTaskBodiesp(); m_mtaskBodyp; m_mtaskBodyp = VN_AS(m_mtaskBodyp->nextp(), MTaskBody)) { clearLastSen(); iterate(m_mtaskBodyp); diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 2a6b1189d..127bf1032 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -1202,11 +1202,9 @@ public: emitVarReset(varp); } virtual void visit(AstExecGraph* nodep) override { - UASSERT_OBJ(nodep == v3Global.rootp()->execGraphp(), nodep, - "ExecGraph should be a singleton!"); - // The location of the AstExecGraph within the containing _eval() - // function is where we want to invoke the graph and wait for it to - // complete. Emitting the children does just that. + // The location of the AstExecGraph within the containing AstCFunc is where we want to + // invoke the graph and wait for it to complete. Emitting the children does just that. + UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered"); iterateChildrenConst(nodep); } virtual void visit(AstChangeDet* nodep) override { // diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index 131d3a304..f9baa3fd5 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -476,18 +476,8 @@ void EmitCSyms::emitSymHdr() { if (v3Global.opt.profPgo()) { puts("\n// PGO PROFILING\n"); - uint64_t maxProfilerId = 0; - if (v3Global.opt.mtasks()) { - for (const V3GraphVertex* vxp - = v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp(); - vxp; vxp = vxp->verticesNextp()) { - const ExecMTask* const mtp - = dynamic_cast(const_cast(vxp)); - if (maxProfilerId < mtp->profilerId()) maxProfilerId = mtp->profilerId(); - } - } - ++maxProfilerId; // As size must include 0 - puts("VlPgoProfiler<" + cvtToStr(maxProfilerId) + "> _vm_pgoProfiler;\n"); + const uint32_t usedMTaskProfilingIDs = v3Global.rootp()->usedMTaskProfilingIDs(); + puts("VlPgoProfiler<" + cvtToStr(usedMTaskProfilingIDs) + "> _vm_pgoProfiler;\n"); } if (!m_scopeNames.empty()) { // Scope names @@ -743,13 +733,15 @@ void EmitCSyms::emitSymImp() { if (v3Global.opt.profPgo()) { puts("// Configure profiling for PGO\n"); if (v3Global.opt.mtasks()) { - for (const V3GraphVertex* vxp - = v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp(); - vxp; vxp = vxp->verticesNextp()) { - ExecMTask* const mtp = dynamic_cast(const_cast(vxp)); - puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \"" - + mtp->hashName() + "\");\n"); - } + v3Global.rootp()->topModulep()->foreach( + [&](const AstExecGraph* execGraphp) { + for (const V3GraphVertex* vxp = execGraphp->depGraphp()->verticesBeginp(); vxp; + vxp = vxp->verticesNextp()) { + const ExecMTask* const mtp = static_cast(vxp); + puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \"" + + mtp->hashName() + "\");\n"); + } + }); } } diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 07dbae5a5..14405ee3d 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -315,6 +315,7 @@ private: } virtual void visit(AstExecGraph* nodep) override { // Treat the ExecGraph like a call to each mtask body + UASSERT_OBJ(!m_mtasksGraphp, nodep, "Cannot handle more than one AstExecGraph"); m_mtasksGraphp = nodep->depGraphp(); for (V3GraphVertex* mtaskVxp = m_mtasksGraphp->verticesBeginp(); mtaskVxp; mtaskVxp = mtaskVxp->verticesNextp()) { diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 529680624..090e81b7d 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -1954,9 +1954,8 @@ void OrderProcess::processMTasks() { // Create the AstExecGraph node which represents the execution // of the MTask graph. FileLine* const rootFlp = v3Global.rootp()->fileline(); - AstExecGraph* const execGraphp = new AstExecGraph(rootFlp); + AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, "eval"}; m_scopetop.addActivep(execGraphp); - v3Global.rootp()->execGraphp(execGraphp); // Create CFuncs and bodies for each MTask. GraphStream emit_mtasks(&mtasks); @@ -1994,7 +1993,8 @@ void OrderProcess::processMTasks() { // and OrderLogicVertex's which are ephemeral to V3Order. // - The ExecMTask graph and the AstMTaskBody's produced here // persist until code generation time. - state.m_execMTaskp = new ExecMTask(execGraphp->mutableDepGraphp(), bodyp, mtaskp->id()); + V3Graph* const depGraphp = execGraphp->depGraphp(); + state.m_execMTaskp = new ExecMTask(depGraphp, bodyp, mtaskp->id()); // Cross-link each ExecMTask and MTaskBody // Q: Why even have two objects? // A: One is an AstNode, the other is a GraphVertex, @@ -2005,10 +2005,9 @@ void OrderProcess::processMTasks() { const AbstractLogicMTask* const fromp = dynamic_cast(fromVxp); const MTaskState& fromState = mtaskStates[fromp->id()]; - new V3GraphEdge(execGraphp->mutableDepGraphp(), fromState.m_execMTaskp, - state.m_execMTaskp, 1); + new V3GraphEdge(depGraphp, fromState.m_execMTaskp, state.m_execMTaskp, 1); } - execGraphp->addMTaskBody(bodyp); + execGraphp->addMTaskBodyp(bodyp); } } diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 4c16c0aa3..18ca601d7 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -2112,8 +2112,8 @@ private: ThreadSchedule& operator=(ThreadSchedule&&) = default; // Debugging - void dumpDotFile(const string& filename) const; - void dumpDotFilePrefixedAlways(const string& nameComment) const; + void dumpDotFile(const V3Graph& graph, const string& filename) const; + void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const; public: // Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must @@ -2137,15 +2137,15 @@ public: }; //! Variant of dumpDotFilePrefixed without --dump option check -void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const { - dumpDotFile(v3Global.debugFilename(nameComment) + ".dot"); +void ThreadSchedule::dumpDotFilePrefixedAlways(const V3Graph& graph, + const string& nameComment) const { + dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot"); } -void ThreadSchedule::dumpDotFile(const string& filename) const { +void ThreadSchedule::dumpDotFile(const V3Graph& graph, const string& filename) const { // This generates a file used by graphviz, https://www.graphviz.org const std::unique_ptr logp{V3File::new_ofstream(filename)}; if (logp->fail()) v3fatal("Can't write " << filename); - auto* const depGraph = v3Global.rootp()->execGraphp()->depGraphp(); // Header *logp << "digraph v3graph {\n"; @@ -2166,7 +2166,7 @@ void ThreadSchedule::dumpDotFile(const string& filename) const { // Find minimum cost MTask for scaling MTask node widths uint32_t minCost = UINT32_MAX; - for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { if (const ExecMTask* const mtaskp = dynamic_cast(vxp)) { minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost; } @@ -2189,13 +2189,13 @@ void ThreadSchedule::dumpDotFile(const string& filename) const { }; // Emit MTasks - for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { if (const ExecMTask* const mtaskp = dynamic_cast(vxp)) emitMTask(mtaskp); } // Emit MTask dependency edges *logp << "\n // MTask dependencies\n"; - for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { if (const ExecMTask* const mtaskp = dynamic_cast(vxp)) { for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) { const V3GraphVertex* const top = edgep->top(); @@ -2382,7 +2382,7 @@ public: } } - if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule"); + if (debug() >= 4) schedule.dumpDotFilePrefixedAlways(mtaskGraph, "schedule"); return schedule; } @@ -2659,15 +2659,14 @@ void V3Partition::go(V3Graph* mtasksp) { LogicMTask* const mtaskp = dynamic_cast(itp); sorted.insert(mtaskp); } - uint32_t nextId = 1; for (auto it = sorted.begin(); it != sorted.end(); ++it) { // We shouldn't perturb the sort order of the set, despite // changing the IDs, they should all just remain in the same // relative order. Confirm that: + const uint32_t nextId = v3Global.rootp()->allocNextMTaskID(); UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here"); UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n"); (*it)->id(nextId); - ++nextId; } } @@ -2868,11 +2867,8 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) { } // Assign profiler IDs - uint64_t profilerId = 0; - for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; - vxp = vxp->verticesNextp()) { - ExecMTask* const mtp = dynamic_cast(const_cast(vxp)); - mtp->profilerId(profilerId++); + for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + static_cast(vxp)->profilerId(v3Global.rootp()->allocNextMTaskProfilingID()); } // Removing tasks may cause edges that were formerly non-transitive to @@ -2961,7 +2957,8 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th } } -static const std::vector createThreadFunctions(const ThreadSchedule& schedule) { +static const std::vector createThreadFunctions(const ThreadSchedule& schedule, + const string& tag) { AstNodeModule* const modp = v3Global.rootp()->topModulep(); FileLine* const fl = modp->fileline(); @@ -2971,8 +2968,7 @@ static const std::vector createThreadFunctions(const ThreadSchedule& for (const std::vector& thread : schedule.threads) { if (thread.empty()) continue; const uint32_t threadId = schedule.threadId(thread.front()); - string name = "__Vthread_"; - name += cvtToStr(threadId); + const string name{"__Vthread__" + tag + "__" + cvtToStr(threadId)}; AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void"); modp->addStmtp(funcp); funcps.push_back(funcp); @@ -3048,32 +3044,31 @@ static void implementExecGraph(AstExecGraph* const execGraphp) { // Schedule the mtasks: statically associate each mtask with a thread, // and determine the order in which each thread will runs its mtasks. - const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp()); + const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->depGraphp()); // Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the // AstExecGrap into the AstCFunc created - const std::vector& funcps = createThreadFunctions(schedule); + const std::vector& funcps = createThreadFunctions(schedule, execGraphp->name()); UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?"); // Start the thread functions at the point this AstExecGraph is located in the tree. addThreadStartToExecGraph(execGraphp, funcps); } -void V3Partition::finalize() { +void V3Partition::finalize(AstNetlist* netlistp) { // Called by Verilator top stage - AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp(); - UASSERT(execGraphp, "Couldn't find AstExecGraph singleton."); + netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) { + // Back in V3Order, we partitioned mtasks using provisional cost + // estimates. However, V3Order precedes some optimizations (notably + // V3LifePost) that can change the cost of logic within each mtask. + // Now that logic is final, recompute the cost and priority of each + // ExecMTask. + fillinCosts(execGraphp->depGraphp()); + finalizeCosts(execGraphp->depGraphp()); - // Back in V3Order, we partitioned mtasks using provisional cost - // estimates. However, V3Order precedes some optimizations (notably - // V3LifePost) that can change the cost of logic within each mtask. - // Now that logic is final, recompute the cost and priority of each - // ExecMTask. - fillinCosts(execGraphp->mutableDepGraphp()); - finalizeCosts(execGraphp->mutableDepGraphp()); - - // Replace the graph body with its multi-threaded implementation. - implementExecGraph(execGraphp); + // Replace the graph body with its multi-threaded implementation. + implementExecGraph(execGraphp); + }); } void V3Partition::selfTest() { diff --git a/src/V3Partition.h b/src/V3Partition.h index 4ba4cc29e..c358599f4 100644 --- a/src/V3Partition.h +++ b/src/V3Partition.h @@ -62,7 +62,7 @@ public: // Operate on the final ExecMTask graph, immediately prior to code // generation time. - static void finalize(); + static void finalize(AstNetlist* netlistp); private: static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp); diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 7ba0c113a..e233a041c 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -503,7 +503,7 @@ static void process() { // threads. Must happen pre-EmitC which relies on the packing // order. Must happen post-V3LifePost which changes the relative // costs of mtasks. - V3Partition::finalize(); + V3Partition::finalize(v3Global.rootp()); } if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {