forked from github/verilator
Prep for multiple AstExecGraph. No functional change.
This commit is contained in:
parent
c79ea88576
commit
fbd568dc47
@ -2034,6 +2034,11 @@ template <> inline bool AstNode::privateMayBeUnder<AstNodeAssign>(const AstNode*
|
||||
template <> inline bool AstNode::privateMayBeUnder<AstVarScope>(const AstNode* nodep) {
|
||||
return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath);
|
||||
}
|
||||
template <> inline bool AstNode::privateMayBeUnder<AstExecGraph>(const AstNode* nodep) {
|
||||
if (VN_IS(nodep, ExecGraph)) return false; // Should not nest
|
||||
if (VN_IS(nodep, NodeStmt)) return false; // Should be directly under CFunc
|
||||
return true;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const AstNode* rhs) {
|
||||
if (!rhs) {
|
||||
|
@ -225,9 +225,10 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) {
|
||||
}
|
||||
}
|
||||
|
||||
AstExecGraph::AstExecGraph(FileLine* fileline)
|
||||
AstExecGraph::AstExecGraph(FileLine* fileline, const string& name)
|
||||
: ASTGEN_SUPER_ExecGraph(fileline)
|
||||
, m_depGraphp{new V3Graph} {}
|
||||
, m_depGraphp{new V3Graph}
|
||||
, m_name{name} {}
|
||||
|
||||
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
|
||||
|
||||
|
@ -9199,27 +9199,30 @@ public:
|
||||
|
||||
class AstExecGraph final : public AstNode {
|
||||
// For parallel execution, this node contains a dependency graph. Each
|
||||
// node in the graph is an ExecMTask, which contains a body for the
|
||||
// mtask, which contains a set of AstActive's, each of which calls a
|
||||
// leaf AstCFunc. whew!
|
||||
// vertex in the graph is an ExecMTask, which contains a body for the
|
||||
// mtask (an AstMTaskBody), which contains sequentially executed statements.
|
||||
//
|
||||
// The mtask bodies are also children of this node, so we can visit
|
||||
// them without traversing the graph (it's not always needed to
|
||||
// traverse the graph.)
|
||||
// The AstMTaskBody nodes are also children of this node, so we can visit
|
||||
// them without traversing the graph.
|
||||
private:
|
||||
V3Graph* const m_depGraphp; // contains ExecMTask's
|
||||
V3Graph* const m_depGraphp; // contains ExecMTask vertices
|
||||
const string m_name; // Name of this AstExecGraph (for uniqueness at code generation)
|
||||
|
||||
public:
|
||||
explicit AstExecGraph(FileLine* fl);
|
||||
explicit AstExecGraph(FileLine* fl, const string& name);
|
||||
ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph)
|
||||
virtual ~AstExecGraph() override;
|
||||
virtual const char* broken() const override {
|
||||
BROKEN_RTN(!m_depGraphp);
|
||||
return nullptr;
|
||||
}
|
||||
virtual string name() const override { return m_name; }
|
||||
V3Graph* depGraphp() { return m_depGraphp; }
|
||||
const V3Graph* depGraphp() const { return m_depGraphp; }
|
||||
V3Graph* mutableDepGraphp() { return m_depGraphp; }
|
||||
void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); }
|
||||
// op1: The mtask bodies
|
||||
AstMTaskBody* mTaskBodiesp() const { return VN_AS(op1p(), MTaskBody); }
|
||||
void addMTaskBodyp(AstMTaskBody* bodyp) { addOp1p(bodyp); }
|
||||
// op2: In later phases, the statements that start the parallel execution
|
||||
void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); }
|
||||
};
|
||||
|
||||
@ -9319,13 +9322,15 @@ private:
|
||||
AstConstPool* const m_constPoolp; // Reference to constant pool, for faster lookup
|
||||
AstPackage* m_dollarUnitPkgp = nullptr; // $unit
|
||||
AstCFunc* m_evalp = nullptr; // The '_eval' function
|
||||
AstExecGraph* m_execGraphp = nullptr; // Execution MTask graph for threads>1 mode
|
||||
AstVarScope* m_dpiExportTriggerp = nullptr; // The DPI export trigger variable
|
||||
AstTopScope* m_topScopep = nullptr; // The singleton AstTopScope under the top module
|
||||
VTimescale m_timeunit; // Global time unit
|
||||
VTimescale m_timeprecision; // Global time precision
|
||||
bool m_changeRequest = false; // Have _change_request method
|
||||
bool m_timescaleSpecified = false; // Input HDL specified timescale
|
||||
uint32_t m_nextFreeMTaskID = 1; // Next unique MTask ID within netlist
|
||||
// starts at 1 so 0 means no MTask ID
|
||||
uint32_t m_nextFreeMTaskProfilingID = 0; // Next unique ID to use for PGO
|
||||
public:
|
||||
AstNetlist();
|
||||
ASTNODE_NODE_FUNCS(Netlist)
|
||||
@ -9369,8 +9374,6 @@ public:
|
||||
}
|
||||
AstCFunc* evalp() const { return m_evalp; }
|
||||
void evalp(AstCFunc* evalp) { m_evalp = evalp; }
|
||||
AstExecGraph* execGraphp() const { return m_execGraphp; }
|
||||
void execGraphp(AstExecGraph* graphp) { m_execGraphp = graphp; }
|
||||
AstVarScope* dpiExportTriggerp() const { return m_dpiExportTriggerp; }
|
||||
void dpiExportTriggerp(AstVarScope* varScopep) { m_dpiExportTriggerp = varScopep; }
|
||||
AstTopScope* topScopep() const { return m_topScopep; }
|
||||
@ -9390,6 +9393,9 @@ public:
|
||||
void timeprecisionMerge(FileLine*, const VTimescale& value);
|
||||
void timescaleSpecified(bool specified) { m_timescaleSpecified = specified; }
|
||||
bool timescaleSpecified() const { return m_timescaleSpecified; }
|
||||
uint32_t allocNextMTaskID() { return m_nextFreeMTaskID++; }
|
||||
uint32_t allocNextMTaskProfilingID() { return m_nextFreeMTaskProfilingID++; }
|
||||
uint32_t usedMTaskProfilingIDs() const { return m_nextFreeMTaskProfilingID; }
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
|
@ -411,7 +411,8 @@ private:
|
||||
}
|
||||
}
|
||||
virtual void visit(AstExecGraph* nodep) override {
|
||||
for (m_mtaskBodyp = VN_AS(nodep->op1p(), MTaskBody); m_mtaskBodyp;
|
||||
VL_RESTORER(m_mtaskBodyp);
|
||||
for (m_mtaskBodyp = nodep->mTaskBodiesp(); m_mtaskBodyp;
|
||||
m_mtaskBodyp = VN_AS(m_mtaskBodyp->nextp(), MTaskBody)) {
|
||||
clearLastSen();
|
||||
iterate(m_mtaskBodyp);
|
||||
|
@ -1202,11 +1202,9 @@ public:
|
||||
emitVarReset(varp);
|
||||
}
|
||||
virtual void visit(AstExecGraph* nodep) override {
|
||||
UASSERT_OBJ(nodep == v3Global.rootp()->execGraphp(), nodep,
|
||||
"ExecGraph should be a singleton!");
|
||||
// The location of the AstExecGraph within the containing _eval()
|
||||
// function is where we want to invoke the graph and wait for it to
|
||||
// complete. Emitting the children does just that.
|
||||
// The location of the AstExecGraph within the containing AstCFunc is where we want to
|
||||
// invoke the graph and wait for it to complete. Emitting the children does just that.
|
||||
UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
virtual void visit(AstChangeDet* nodep) override { //
|
||||
|
@ -476,18 +476,8 @@ void EmitCSyms::emitSymHdr() {
|
||||
|
||||
if (v3Global.opt.profPgo()) {
|
||||
puts("\n// PGO PROFILING\n");
|
||||
uint64_t maxProfilerId = 0;
|
||||
if (v3Global.opt.mtasks()) {
|
||||
for (const V3GraphVertex* vxp
|
||||
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
|
||||
vxp; vxp = vxp->verticesNextp()) {
|
||||
const ExecMTask* const mtp
|
||||
= dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||
if (maxProfilerId < mtp->profilerId()) maxProfilerId = mtp->profilerId();
|
||||
}
|
||||
}
|
||||
++maxProfilerId; // As size must include 0
|
||||
puts("VlPgoProfiler<" + cvtToStr(maxProfilerId) + "> _vm_pgoProfiler;\n");
|
||||
const uint32_t usedMTaskProfilingIDs = v3Global.rootp()->usedMTaskProfilingIDs();
|
||||
puts("VlPgoProfiler<" + cvtToStr(usedMTaskProfilingIDs) + "> _vm_pgoProfiler;\n");
|
||||
}
|
||||
|
||||
if (!m_scopeNames.empty()) { // Scope names
|
||||
@ -743,13 +733,15 @@ void EmitCSyms::emitSymImp() {
|
||||
if (v3Global.opt.profPgo()) {
|
||||
puts("// Configure profiling for PGO\n");
|
||||
if (v3Global.opt.mtasks()) {
|
||||
for (const V3GraphVertex* vxp
|
||||
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
|
||||
vxp; vxp = vxp->verticesNextp()) {
|
||||
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||
v3Global.rootp()->topModulep()->foreach<AstExecGraph>(
|
||||
[&](const AstExecGraph* execGraphp) {
|
||||
for (const V3GraphVertex* vxp = execGraphp->depGraphp()->verticesBeginp(); vxp;
|
||||
vxp = vxp->verticesNextp()) {
|
||||
const ExecMTask* const mtp = static_cast<const ExecMTask*>(vxp);
|
||||
puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \""
|
||||
+ mtp->hashName() + "\");\n");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -315,6 +315,7 @@ private:
|
||||
}
|
||||
virtual void visit(AstExecGraph* nodep) override {
|
||||
// Treat the ExecGraph like a call to each mtask body
|
||||
UASSERT_OBJ(!m_mtasksGraphp, nodep, "Cannot handle more than one AstExecGraph");
|
||||
m_mtasksGraphp = nodep->depGraphp();
|
||||
for (V3GraphVertex* mtaskVxp = m_mtasksGraphp->verticesBeginp(); mtaskVxp;
|
||||
mtaskVxp = mtaskVxp->verticesNextp()) {
|
||||
|
@ -1954,9 +1954,8 @@ void OrderProcess::processMTasks() {
|
||||
// Create the AstExecGraph node which represents the execution
|
||||
// of the MTask graph.
|
||||
FileLine* const rootFlp = v3Global.rootp()->fileline();
|
||||
AstExecGraph* const execGraphp = new AstExecGraph(rootFlp);
|
||||
AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, "eval"};
|
||||
m_scopetop.addActivep(execGraphp);
|
||||
v3Global.rootp()->execGraphp(execGraphp);
|
||||
|
||||
// Create CFuncs and bodies for each MTask.
|
||||
GraphStream<MTaskVxIdLessThan> emit_mtasks(&mtasks);
|
||||
@ -1994,7 +1993,8 @@ void OrderProcess::processMTasks() {
|
||||
// and OrderLogicVertex's which are ephemeral to V3Order.
|
||||
// - The ExecMTask graph and the AstMTaskBody's produced here
|
||||
// persist until code generation time.
|
||||
state.m_execMTaskp = new ExecMTask(execGraphp->mutableDepGraphp(), bodyp, mtaskp->id());
|
||||
V3Graph* const depGraphp = execGraphp->depGraphp();
|
||||
state.m_execMTaskp = new ExecMTask(depGraphp, bodyp, mtaskp->id());
|
||||
// Cross-link each ExecMTask and MTaskBody
|
||||
// Q: Why even have two objects?
|
||||
// A: One is an AstNode, the other is a GraphVertex,
|
||||
@ -2005,10 +2005,9 @@ void OrderProcess::processMTasks() {
|
||||
const AbstractLogicMTask* const fromp
|
||||
= dynamic_cast<const AbstractLogicMTask*>(fromVxp);
|
||||
const MTaskState& fromState = mtaskStates[fromp->id()];
|
||||
new V3GraphEdge(execGraphp->mutableDepGraphp(), fromState.m_execMTaskp,
|
||||
state.m_execMTaskp, 1);
|
||||
new V3GraphEdge(depGraphp, fromState.m_execMTaskp, state.m_execMTaskp, 1);
|
||||
}
|
||||
execGraphp->addMTaskBody(bodyp);
|
||||
execGraphp->addMTaskBodyp(bodyp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2112,8 +2112,8 @@ private:
|
||||
ThreadSchedule& operator=(ThreadSchedule&&) = default;
|
||||
|
||||
// Debugging
|
||||
void dumpDotFile(const string& filename) const;
|
||||
void dumpDotFilePrefixedAlways(const string& nameComment) const;
|
||||
void dumpDotFile(const V3Graph& graph, const string& filename) const;
|
||||
void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const;
|
||||
|
||||
public:
|
||||
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
|
||||
@ -2137,15 +2137,15 @@ public:
|
||||
};
|
||||
|
||||
//! Variant of dumpDotFilePrefixed without --dump option check
|
||||
void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const {
|
||||
dumpDotFile(v3Global.debugFilename(nameComment) + ".dot");
|
||||
void ThreadSchedule::dumpDotFilePrefixedAlways(const V3Graph& graph,
|
||||
const string& nameComment) const {
|
||||
dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot");
|
||||
}
|
||||
|
||||
void ThreadSchedule::dumpDotFile(const string& filename) const {
|
||||
void ThreadSchedule::dumpDotFile(const V3Graph& graph, const string& filename) const {
|
||||
// This generates a file used by graphviz, https://www.graphviz.org
|
||||
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
|
||||
if (logp->fail()) v3fatal("Can't write " << filename);
|
||||
auto* const depGraph = v3Global.rootp()->execGraphp()->depGraphp();
|
||||
|
||||
// Header
|
||||
*logp << "digraph v3graph {\n";
|
||||
@ -2166,7 +2166,7 @@ void ThreadSchedule::dumpDotFile(const string& filename) const {
|
||||
|
||||
// Find minimum cost MTask for scaling MTask node widths
|
||||
uint32_t minCost = UINT32_MAX;
|
||||
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
|
||||
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost;
|
||||
}
|
||||
@ -2189,13 +2189,13 @@ void ThreadSchedule::dumpDotFile(const string& filename) const {
|
||||
};
|
||||
|
||||
// Emit MTasks
|
||||
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) emitMTask(mtaskp);
|
||||
}
|
||||
|
||||
// Emit MTask dependency edges
|
||||
*logp << "\n // MTask dependencies\n";
|
||||
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
for (const V3GraphVertex* vxp = graph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
|
||||
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
||||
const V3GraphVertex* const top = edgep->top();
|
||||
@ -2382,7 +2382,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule");
|
||||
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways(mtaskGraph, "schedule");
|
||||
|
||||
return schedule;
|
||||
}
|
||||
@ -2659,15 +2659,14 @@ void V3Partition::go(V3Graph* mtasksp) {
|
||||
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(itp);
|
||||
sorted.insert(mtaskp);
|
||||
}
|
||||
uint32_t nextId = 1;
|
||||
for (auto it = sorted.begin(); it != sorted.end(); ++it) {
|
||||
// We shouldn't perturb the sort order of the set, despite
|
||||
// changing the IDs, they should all just remain in the same
|
||||
// relative order. Confirm that:
|
||||
const uint32_t nextId = v3Global.rootp()->allocNextMTaskID();
|
||||
UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here");
|
||||
UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n");
|
||||
(*it)->id(nextId);
|
||||
++nextId;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2868,11 +2867,8 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
||||
}
|
||||
|
||||
// Assign profiler IDs
|
||||
uint64_t profilerId = 0;
|
||||
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
||||
vxp = vxp->verticesNextp()) {
|
||||
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||
mtp->profilerId(profilerId++);
|
||||
for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||
static_cast<ExecMTask*>(vxp)->profilerId(v3Global.rootp()->allocNextMTaskProfilingID());
|
||||
}
|
||||
|
||||
// Removing tasks may cause edges that were formerly non-transitive to
|
||||
@ -2961,7 +2957,8 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
|
||||
}
|
||||
}
|
||||
|
||||
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule) {
|
||||
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule,
|
||||
const string& tag) {
|
||||
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
||||
FileLine* const fl = modp->fileline();
|
||||
|
||||
@ -2971,8 +2968,7 @@ static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule&
|
||||
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
|
||||
if (thread.empty()) continue;
|
||||
const uint32_t threadId = schedule.threadId(thread.front());
|
||||
string name = "__Vthread_";
|
||||
name += cvtToStr(threadId);
|
||||
const string name{"__Vthread__" + tag + "__" + cvtToStr(threadId)};
|
||||
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");
|
||||
modp->addStmtp(funcp);
|
||||
funcps.push_back(funcp);
|
||||
@ -3048,32 +3044,31 @@ static void implementExecGraph(AstExecGraph* const execGraphp) {
|
||||
|
||||
// Schedule the mtasks: statically associate each mtask with a thread,
|
||||
// and determine the order in which each thread will runs its mtasks.
|
||||
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp());
|
||||
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->depGraphp());
|
||||
|
||||
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
|
||||
// AstExecGrap into the AstCFunc created
|
||||
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule);
|
||||
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name());
|
||||
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
|
||||
|
||||
// Start the thread functions at the point this AstExecGraph is located in the tree.
|
||||
addThreadStartToExecGraph(execGraphp, funcps);
|
||||
}
|
||||
|
||||
void V3Partition::finalize() {
|
||||
void V3Partition::finalize(AstNetlist* netlistp) {
|
||||
// Called by Verilator top stage
|
||||
AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp();
|
||||
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
|
||||
|
||||
netlistp->topModulep()->foreach<AstExecGraph>([&](AstExecGraph* execGraphp) {
|
||||
// Back in V3Order, we partitioned mtasks using provisional cost
|
||||
// estimates. However, V3Order precedes some optimizations (notably
|
||||
// V3LifePost) that can change the cost of logic within each mtask.
|
||||
// Now that logic is final, recompute the cost and priority of each
|
||||
// ExecMTask.
|
||||
fillinCosts(execGraphp->mutableDepGraphp());
|
||||
finalizeCosts(execGraphp->mutableDepGraphp());
|
||||
fillinCosts(execGraphp->depGraphp());
|
||||
finalizeCosts(execGraphp->depGraphp());
|
||||
|
||||
// Replace the graph body with its multi-threaded implementation.
|
||||
implementExecGraph(execGraphp);
|
||||
});
|
||||
}
|
||||
|
||||
void V3Partition::selfTest() {
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
|
||||
// Operate on the final ExecMTask graph, immediately prior to code
|
||||
// generation time.
|
||||
static void finalize();
|
||||
static void finalize(AstNetlist* netlistp);
|
||||
|
||||
private:
|
||||
static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp);
|
||||
|
@ -503,7 +503,7 @@ static void process() {
|
||||
// threads. Must happen pre-EmitC which relies on the packing
|
||||
// order. Must happen post-V3LifePost which changes the relative
|
||||
// costs of mtasks.
|
||||
V3Partition::finalize();
|
||||
V3Partition::finalize(v3Global.rootp());
|
||||
}
|
||||
|
||||
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {
|
||||
|
Loading…
Reference in New Issue
Block a user