Prep for multiple AstExecGraph. No functional change.

This commit is contained in:
Geza Lore 2022-04-10 11:37:41 +01:00
parent c79ea88576
commit fbd568dc47
11 changed files with 81 additions and 83 deletions

View File

@ -2034,6 +2034,11 @@ template <> inline bool AstNode::privateMayBeUnder<AstNodeAssign>(const AstNode*
template <> inline bool AstNode::privateMayBeUnder<AstVarScope>(const AstNode* nodep) {
return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath);
}
template <> inline bool AstNode::privateMayBeUnder<AstExecGraph>(const AstNode* nodep) {
if (VN_IS(nodep, ExecGraph)) return false; // Should not nest
if (VN_IS(nodep, NodeStmt)) return false; // Should be directly under CFunc
return true;
}
inline std::ostream& operator<<(std::ostream& os, const AstNode* rhs) {
if (!rhs) {

View File

@ -225,9 +225,10 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) {
}
}
AstExecGraph::AstExecGraph(FileLine* fileline)
AstExecGraph::AstExecGraph(FileLine* fileline, const string& name)
: ASTGEN_SUPER_ExecGraph(fileline)
, m_depGraphp{new V3Graph} {}
, m_depGraphp{new V3Graph}
, m_name{name} {}
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }

View File

@ -9199,27 +9199,30 @@ public:
class AstExecGraph final : public AstNode {
// For parallel execution, this node contains a dependency graph. Each
// node in the graph is an ExecMTask, which contains a body for the
// mtask, which contains a set of AstActive's, each of which calls a
// leaf AstCFunc. whew!
// vertex in the graph is an ExecMTask, which contains a body for the
// mtask (an AstMTaskBody), which contains sequentially executed statements.
//
// The mtask bodies are also children of this node, so we can visit
// them without traversing the graph (it's not always needed to
// traverse the graph.)
// The AstMTaskBody nodes are also children of this node, so we can visit
// them without traversing the graph.
private:
V3Graph* const m_depGraphp; // contains ExecMTask's
V3Graph* const m_depGraphp; // contains ExecMTask vertices
const string m_name; // Name of this AstExecGraph (for uniqueness at code generation)
public:
explicit AstExecGraph(FileLine* fl);
explicit AstExecGraph(FileLine* fl, const string& name);
ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph)
virtual ~AstExecGraph() override;
virtual const char* broken() const override {
BROKEN_RTN(!m_depGraphp);
return nullptr;
}
virtual string name() const override { return m_name; }
V3Graph* depGraphp() { return m_depGraphp; }
const V3Graph* depGraphp() const { return m_depGraphp; }
V3Graph* mutableDepGraphp() { return m_depGraphp; }
void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); }
// op1: The mtask bodies
AstMTaskBody* mTaskBodiesp() const { return VN_AS(op1p(), MTaskBody); }
void addMTaskBodyp(AstMTaskBody* bodyp) { addOp1p(bodyp); }
// op2: In later phases, the statements that start the parallel execution
void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); }
};
@ -9319,13 +9322,15 @@ private:
AstConstPool* const m_constPoolp; // Reference to constant pool, for faster lookup
AstPackage* m_dollarUnitPkgp = nullptr; // $unit
AstCFunc* m_evalp = nullptr; // The '_eval' function
AstExecGraph* m_execGraphp = nullptr; // Execution MTask graph for threads>1 mode
AstVarScope* m_dpiExportTriggerp = nullptr; // The DPI export trigger variable
AstTopScope* m_topScopep = nullptr; // The singleton AstTopScope under the top module
VTimescale m_timeunit; // Global time unit
VTimescale m_timeprecision; // Global time precision
bool m_changeRequest = false; // Have _change_request method
bool m_timescaleSpecified = false; // Input HDL specified timescale
uint32_t m_nextFreeMTaskID = 1; // Next unique MTask ID within netlist
// starts at 1 so 0 means no MTask ID
uint32_t m_nextFreeMTaskProfilingID = 0; // Next unique ID to use for PGO
public:
AstNetlist();
ASTNODE_NODE_FUNCS(Netlist)
@ -9369,8 +9374,6 @@ public:
}
AstCFunc* evalp() const { return m_evalp; }
void evalp(AstCFunc* evalp) { m_evalp = evalp; }
AstExecGraph* execGraphp() const { return m_execGraphp; }
void execGraphp(AstExecGraph* graphp) { m_execGraphp = graphp; }
AstVarScope* dpiExportTriggerp() const { return m_dpiExportTriggerp; }
void dpiExportTriggerp(AstVarScope* varScopep) { m_dpiExportTriggerp = varScopep; }
AstTopScope* topScopep() const { return m_topScopep; }
@ -9390,6 +9393,9 @@ public:
void timeprecisionMerge(FileLine*, const VTimescale& value);
void timescaleSpecified(bool specified) { m_timescaleSpecified = specified; }
bool timescaleSpecified() const { return m_timescaleSpecified; }
uint32_t allocNextMTaskID() { return m_nextFreeMTaskID++; }
uint32_t allocNextMTaskProfilingID() { return m_nextFreeMTaskProfilingID++; }
uint32_t usedMTaskProfilingIDs() const { return m_nextFreeMTaskProfilingID; }
};
//######################################################################

View File

@ -411,7 +411,8 @@ private:
}
}
virtual void visit(AstExecGraph* nodep) override {
for (m_mtaskBodyp = VN_AS(nodep->op1p(), MTaskBody); m_mtaskBodyp;
VL_RESTORER(m_mtaskBodyp);
for (m_mtaskBodyp = nodep->mTaskBodiesp(); m_mtaskBodyp;
m_mtaskBodyp = VN_AS(m_mtaskBodyp->nextp(), MTaskBody)) {
clearLastSen();
iterate(m_mtaskBodyp);

View File

@ -1202,11 +1202,9 @@ public:
emitVarReset(varp);
}
virtual void visit(AstExecGraph* nodep) override {
UASSERT_OBJ(nodep == v3Global.rootp()->execGraphp(), nodep,
"ExecGraph should be a singleton!");
// The location of the AstExecGraph within the containing _eval()
// function is where we want to invoke the graph and wait for it to
// complete. Emitting the children does just that.
// The location of the AstExecGraph within the containing AstCFunc is where we want to
// invoke the graph and wait for it to complete. Emitting the children does just that.
UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered");
iterateChildrenConst(nodep);
}
virtual void visit(AstChangeDet* nodep) override { //

View File

@ -476,18 +476,8 @@ void EmitCSyms::emitSymHdr() {
if (v3Global.opt.profPgo()) {
puts("\n// PGO PROFILING\n");
uint64_t maxProfilerId = 0;
if (v3Global.opt.mtasks()) {
for (const V3GraphVertex* vxp
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
vxp; vxp = vxp->verticesNextp()) {
const ExecMTask* const mtp
= dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
if (maxProfilerId < mtp->profilerId()) maxProfilerId = mtp->profilerId();
}
}
++maxProfilerId; // As size must include 0
puts("VlPgoProfiler<" + cvtToStr(maxProfilerId) + "> _vm_pgoProfiler;\n");
const uint32_t usedMTaskProfilingIDs = v3Global.rootp()->usedMTaskProfilingIDs();
puts("VlPgoProfiler<" + cvtToStr(usedMTaskProfilingIDs) + "> _vm_pgoProfiler;\n");
}
if (!m_scopeNames.empty()) { // Scope names
@ -743,13 +733,15 @@ void EmitCSyms::emitSymImp() {
if (v3Global.opt.profPgo()) {
puts("// Configure profiling for PGO\n");
if (v3Global.opt.mtasks()) {
for (const V3GraphVertex* vxp
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
vxp; vxp = vxp->verticesNextp()) {
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \""
+ mtp->hashName() + "\");\n");
}
v3Global.rootp()->topModulep()->foreach<AstExecGraph>(
[&](const AstExecGraph* execGraphp) {
for (const V3GraphVertex* vxp = execGraphp->depGraphp()->verticesBeginp(); vxp;
vxp = vxp->verticesNextp()) {
const ExecMTask* const mtp = static_cast<const ExecMTask*>(vxp);
puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \""
+ mtp->hashName() + "\");\n");
}
});
}
}

View File

@ -315,6 +315,7 @@ private:
}
virtual void visit(AstExecGraph* nodep) override {
// Treat the ExecGraph like a call to each mtask body
UASSERT_OBJ(!m_mtasksGraphp, nodep, "Cannot handle more than one AstExecGraph");
m_mtasksGraphp = nodep->depGraphp();
for (V3GraphVertex* mtaskVxp = m_mtasksGraphp->verticesBeginp(); mtaskVxp;
mtaskVxp = mtaskVxp->verticesNextp()) {

View File

@ -1954,9 +1954,8 @@ void OrderProcess::processMTasks() {
// Create the AstExecGraph node which represents the execution
// of the MTask graph.
FileLine* const rootFlp = v3Global.rootp()->fileline();
AstExecGraph* const execGraphp = new AstExecGraph(rootFlp);
AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, "eval"};
m_scopetop.addActivep(execGraphp);
v3Global.rootp()->execGraphp(execGraphp);
// Create CFuncs and bodies for each MTask.
GraphStream<MTaskVxIdLessThan> emit_mtasks(&mtasks);
@ -1994,7 +1993,8 @@ void OrderProcess::processMTasks() {
// and OrderLogicVertex's which are ephemeral to V3Order.
// - The ExecMTask graph and the AstMTaskBody's produced here
// persist until code generation time.
state.m_execMTaskp = new ExecMTask(execGraphp->mutableDepGraphp(), bodyp, mtaskp->id());
V3Graph* const depGraphp = execGraphp->depGraphp();
state.m_execMTaskp = new ExecMTask(depGraphp, bodyp, mtaskp->id());
// Cross-link each ExecMTask and MTaskBody
// Q: Why even have two objects?
// A: One is an AstNode, the other is a GraphVertex,
@ -2005,10 +2005,9 @@ void OrderProcess::processMTasks() {
const AbstractLogicMTask* const fromp
= dynamic_cast<const AbstractLogicMTask*>(fromVxp);
const MTaskState& fromState = mtaskStates[fromp->id()];
new V3GraphEdge(execGraphp->mutableDepGraphp(), fromState.m_execMTaskp,
state.m_execMTaskp, 1);
new V3GraphEdge(depGraphp, fromState.m_execMTaskp, state.m_execMTaskp, 1);
}
execGraphp->addMTaskBody(bodyp);
execGraphp->addMTaskBodyp(bodyp);
}
}

View File

@ -2112,8 +2112,8 @@ private:
ThreadSchedule& operator=(ThreadSchedule&&) = default;
// Debugging
void dumpDotFile(const string& filename) const;
void dumpDotFilePrefixedAlways(const string& nameComment) const;
void dumpDotFile(const V3Graph& graph, const string& filename) const;
void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const;
public:
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
@ -2137,15 +2137,15 @@ public:
};
//! Variant of dumpDotFilePrefixed without --dump option check
void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const {
dumpDotFile(v3Global.debugFilename(nameComment) + ".dot");
void ThreadSchedule::dumpDotFilePrefixedAlways(const V3Graph& graph,
const string& nameComment) const {
dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot");
}
void ThreadSchedule::dumpDotFile(const string& filename) const {
void ThreadSchedule::dumpDotFile(const V3Graph& graph, const string& filename) const {
// This generates a file used by graphviz, https://www.graphviz.org
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
if (logp->fail()) v3fatal("Can't write " << filename);
auto* const depGraph = v3Global.rootp()->execGraphp()->depGraphp();
// Header
*logp << "digraph v3graph {\n";
@ -2166,7 +2166,7 @@ void ThreadSchedule::dumpDotFile(const string& filename) const {
// Find minimum cost MTask for scaling MTask node widths
uint32_t minCost = UINT32_MAX;
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost;
}
@ -2189,13 +2189,13 @@ void ThreadSchedule::dumpDotFile(const string& filename) const {
};
// Emit MTasks
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) emitMTask(mtaskp);
}
// Emit MTask dependency edges
*logp << "\n // MTask dependencies\n";
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const V3GraphVertex* const top = edgep->top();
@ -2382,7 +2382,7 @@ public:
}
}
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule");
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways(mtaskGraph, "schedule");
return schedule;
}
@ -2659,15 +2659,14 @@ void V3Partition::go(V3Graph* mtasksp) {
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(itp);
sorted.insert(mtaskp);
}
uint32_t nextId = 1;
for (auto it = sorted.begin(); it != sorted.end(); ++it) {
// We shouldn't perturb the sort order of the set, despite
// changing the IDs, they should all just remain in the same
// relative order. Confirm that:
const uint32_t nextId = v3Global.rootp()->allocNextMTaskID();
UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here");
UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n");
(*it)->id(nextId);
++nextId;
}
}
@ -2868,11 +2867,8 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) {
}
// Assign profiler IDs
uint64_t profilerId = 0;
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
vxp = vxp->verticesNextp()) {
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
mtp->profilerId(profilerId++);
for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
static_cast<ExecMTask*>(vxp)->profilerId(v3Global.rootp()->allocNextMTaskProfilingID());
}
// Removing tasks may cause edges that were formerly non-transitive to
@ -2961,7 +2957,8 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
}
}
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule) {
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule,
const string& tag) {
AstNodeModule* const modp = v3Global.rootp()->topModulep();
FileLine* const fl = modp->fileline();
@ -2971,8 +2968,7 @@ static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule&
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
if (thread.empty()) continue;
const uint32_t threadId = schedule.threadId(thread.front());
string name = "__Vthread_";
name += cvtToStr(threadId);
const string name{"__Vthread__" + tag + "__" + cvtToStr(threadId)};
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");
modp->addStmtp(funcp);
funcps.push_back(funcp);
@ -3048,32 +3044,31 @@ static void implementExecGraph(AstExecGraph* const execGraphp) {
// Schedule the mtasks: statically associate each mtask with a thread,
// and determine the order in which each thread will runs its mtasks.
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp());
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->depGraphp());
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
// AstExecGrap into the AstCFunc created
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule);
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name());
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
// Start the thread functions at the point this AstExecGraph is located in the tree.
addThreadStartToExecGraph(execGraphp, funcps);
}
void V3Partition::finalize() {
void V3Partition::finalize(AstNetlist* netlistp) {
// Called by Verilator top stage
AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp();
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
netlistp->topModulep()->foreach<AstExecGraph>([&](AstExecGraph* execGraphp) {
// Back in V3Order, we partitioned mtasks using provisional cost
// estimates. However, V3Order precedes some optimizations (notably
// V3LifePost) that can change the cost of logic within each mtask.
// Now that logic is final, recompute the cost and priority of each
// ExecMTask.
fillinCosts(execGraphp->depGraphp());
finalizeCosts(execGraphp->depGraphp());
// Back in V3Order, we partitioned mtasks using provisional cost
// estimates. However, V3Order precedes some optimizations (notably
// V3LifePost) that can change the cost of logic within each mtask.
// Now that logic is final, recompute the cost and priority of each
// ExecMTask.
fillinCosts(execGraphp->mutableDepGraphp());
finalizeCosts(execGraphp->mutableDepGraphp());
// Replace the graph body with its multi-threaded implementation.
implementExecGraph(execGraphp);
// Replace the graph body with its multi-threaded implementation.
implementExecGraph(execGraphp);
});
}
void V3Partition::selfTest() {

View File

@ -62,7 +62,7 @@ public:
// Operate on the final ExecMTask graph, immediately prior to code
// generation time.
static void finalize();
static void finalize(AstNetlist* netlistp);
private:
static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp);

View File

@ -503,7 +503,7 @@ static void process() {
// threads. Must happen pre-EmitC which relies on the packing
// order. Must happen post-V3LifePost which changes the relative
// costs of mtasks.
V3Partition::finalize();
V3Partition::finalize(v3Global.rootp());
}
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {