mirror of
https://github.com/verilator/verilator.git
synced 2025-04-05 04:02:37 +00:00
Compute MTask affinity in V3VariableOrder (#4991)
Instead of carrying around MTask affinity from scheduling, compute it in V3VariableOrder (where it is used), by tracing through the code. This simplifies some code and has the benefit of handling variables introduced after scheduling. It's worth a few % speed at run-time, and the new implementation of V3VariableOrder is slightly more efficient, though the speed/space is still dominated by the TSP sort.
This commit is contained in:
parent
e8a9662eb5
commit
292cc54768
@ -49,9 +49,6 @@ class VFlagLogicPacked {};
|
||||
class VFlagBitPacked {};
|
||||
class VFlagChildDType {}; // Used by parser.y to select constructor that sets childDType
|
||||
|
||||
// Used as key for another map, needs operator<, hence not an unordered_set
|
||||
using MTaskIdSet = std::set<int>; // Set of mtaskIds for Var sorting
|
||||
|
||||
//######################################################################
|
||||
|
||||
// For broken() function, return error string if have a match
|
||||
|
@ -1742,7 +1742,6 @@ class AstVar final : public AstNode {
|
||||
VDirection m_declDirection; // Declared direction input/output etc
|
||||
VLifetime m_lifetime; // Lifetime
|
||||
VVarAttrClocker m_attrClocker;
|
||||
MTaskIdSet m_mtaskIds; // MTaskID's that read or write this var
|
||||
int m_pinNum = 0; // For XML, if non-zero the connection pin number
|
||||
bool m_ansi : 1; // Params or pins declared in the module header, rather than the body
|
||||
bool m_declTyped : 1; // Declared as type (for dedup check)
|
||||
@ -2096,8 +2095,6 @@ public:
|
||||
m_name = name;
|
||||
}
|
||||
static AstVar* scVarRecurse(AstNode* nodep);
|
||||
void addMTaskId(int id) { m_mtaskIds.insert(id); }
|
||||
const MTaskIdSet& mtaskIds() const { return m_mtaskIds; }
|
||||
void pinNum(int id) { m_pinNum = id; }
|
||||
int pinNum() const { return m_pinNum; }
|
||||
};
|
||||
|
@ -2464,22 +2464,6 @@ AstExecGraph* V3Order::createParallel(const OrderGraph& orderGraph, const std::s
|
||||
|
||||
// Add this logic to the per-mtask order
|
||||
mtaskStates[mtaskId].m_logics.push_back(movep->logicp());
|
||||
|
||||
// Since we happen to be iterating over every logic node,
|
||||
// take this opportunity to annotate each AstVar with the id's
|
||||
// of mTaskGraphp that consume it and produce it. We'll use this
|
||||
// information in V3EmitC when we lay out var's in memory.
|
||||
const OrderLogicVertex* const logicp = movep->logicp();
|
||||
for (const V3GraphEdge* edgep = logicp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
||||
const OrderVarVertex* const vVtxp = edgep->fromp()->cast<const OrderVarVertex>();
|
||||
if (!vVtxp) continue;
|
||||
vVtxp->vscp()->varp()->addMTaskId(mtaskId);
|
||||
}
|
||||
for (const V3GraphEdge* edgep = logicp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
||||
const OrderVarVertex* const vVtxp = edgep->top()->cast<const OrderVarVertex>();
|
||||
if (!vVtxp) continue;
|
||||
vVtxp->vscp()->varp()->addMTaskId(mtaskId);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the AstExecGraph node which represents the execution
|
||||
|
@ -26,25 +26,85 @@
|
||||
|
||||
#include "V3AstUserAllocator.h"
|
||||
#include "V3EmitCBase.h"
|
||||
#include "V3ExecGraph.h"
|
||||
#include "V3TSP.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||
|
||||
using MTaskIdVec = std::vector<bool>; // Used as a bit-set indexed by MTask ID
|
||||
using MTaskAffinityMap = std::unordered_map<const AstVar*, MTaskIdVec>;
|
||||
|
||||
// Trace through code reachable form an MTask and annotate referenced variabels
|
||||
class GatherMTaskAffinity final : VNVisitorConst {
|
||||
// NODE STATE
|
||||
// AstCFunc::user1() // bool: Already traced this function
|
||||
// AstVar::user1() // bool: Already traced this variable
|
||||
const VNUser1InUse m_user1InUse;
|
||||
|
||||
// STATE
|
||||
MTaskAffinityMap& m_results; // The result map being built;
|
||||
const uint32_t m_id; // Id of mtask being analysed
|
||||
const size_t m_usedIds = ExecMTask::numUsedIds(); // Value of max id + 1
|
||||
|
||||
// CONSTRUCTOR
|
||||
GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results)
|
||||
: m_results{results}
|
||||
, m_id{mTaskp->id()} {
|
||||
iterateChildrenConst(mTaskp->bodyp());
|
||||
}
|
||||
~GatherMTaskAffinity() = default;
|
||||
VL_UNMOVABLE(GatherMTaskAffinity);
|
||||
|
||||
// VISIT
|
||||
void visit(AstNodeVarRef* nodep) {
|
||||
// Cheaper than relying on emplace().second
|
||||
if (nodep->user1SetOnce()) return;
|
||||
AstVar* const varp = nodep->varp();
|
||||
// Ignore TriggerVec. They are big and read-only in the MTask bodies
|
||||
AstBasicDType* const basicp = varp->dtypep()->basicp();
|
||||
if (basicp && basicp->isTriggerVec()) return;
|
||||
// Set affinity bit
|
||||
MTaskIdVec& affinity = m_results
|
||||
.emplace(std::piecewise_construct, //
|
||||
std::forward_as_tuple(varp), //
|
||||
std::forward_as_tuple(m_usedIds))
|
||||
.first->second;
|
||||
affinity[m_id] = true;
|
||||
}
|
||||
|
||||
void visit(AstCFunc* nodep) {
|
||||
if (nodep->user1SetOnce()) return; // Prevent repeat traversals/recursion
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
void visit(AstNodeCCall* nodep) {
|
||||
iterateChildrenConst(nodep); // Arguments
|
||||
iterateConst(nodep->funcp()); // Callee
|
||||
}
|
||||
|
||||
void visit(AstNode* nodep) { iterateChildrenConst(nodep); }
|
||||
|
||||
public:
|
||||
static void apply(const ExecMTask* mTaskp, MTaskAffinityMap& results) {
|
||||
GatherMTaskAffinity{mTaskp, results};
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// Establish mtask variable sort order in mtasks mode
|
||||
|
||||
class VarTspSorter final : public V3TSP::TspStateBase {
|
||||
// MEMBERS
|
||||
const MTaskIdSet& m_mtaskIds; // Mtask we're ordering
|
||||
static unsigned s_serialNext; // Unique ID to establish serial order
|
||||
unsigned m_serial; // Serial ordering
|
||||
const MTaskIdVec& m_mTaskIds; // Mtask we're ordering
|
||||
static uint32_t s_serialNext; // Unique ID to establish serial order
|
||||
const uint32_t m_serial = ++s_serialNext; // Serial ordering
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit VarTspSorter(const MTaskIdSet& mtaskIds)
|
||||
: m_mtaskIds(mtaskIds) { // Cannot be {} or GCC 4.8 false warning
|
||||
m_serial = ++s_serialNext; // Cannot be ()/{} or GCC 4.8 false warning
|
||||
explicit VarTspSorter(const MTaskIdVec& mTaskIds)
|
||||
: m_mTaskIds{mTaskIds} {
|
||||
UASSERT(mTaskIds.size() == ExecMTask::numUsedIds(), "Wrong size for MTask ID vector");
|
||||
}
|
||||
~VarTspSorter() override = default;
|
||||
// METHODS
|
||||
@ -52,26 +112,20 @@ public:
|
||||
return operator<(static_cast<const VarTspSorter&>(other));
|
||||
}
|
||||
bool operator<(const VarTspSorter& other) const { return m_serial < other.m_serial; }
|
||||
const MTaskIdSet& mtaskIds() const { return m_mtaskIds; }
|
||||
const MTaskIdVec& mTaskIds() const { return m_mTaskIds; }
|
||||
int cost(const TspStateBase* otherp) const override {
|
||||
return cost(static_cast<const VarTspSorter*>(otherp));
|
||||
}
|
||||
int cost(const VarTspSorter* otherp) const {
|
||||
int cost = diffs(m_mtaskIds, otherp->m_mtaskIds);
|
||||
cost += diffs(otherp->m_mtaskIds, m_mtaskIds);
|
||||
// Compute the number of MTasks not shared (Hamming distance)
|
||||
int cost = 0;
|
||||
const size_t size = ExecMTask::numUsedIds();
|
||||
for (size_t i = 0; i < size; ++i) { cost += m_mTaskIds.at(i) ^ otherp->m_mTaskIds.at(i); }
|
||||
return cost;
|
||||
}
|
||||
// Returns the number of elements in set_a that don't appear in set_b
|
||||
static int diffs(const MTaskIdSet& set_a, const MTaskIdSet& set_b) {
|
||||
int diffs = 0;
|
||||
for (int i : set_a) {
|
||||
if (set_b.find(i) == set_b.end()) ++diffs;
|
||||
}
|
||||
return diffs;
|
||||
}
|
||||
};
|
||||
|
||||
unsigned VarTspSorter::s_serialNext = 0;
|
||||
uint32_t VarTspSorter::s_serialNext = 0;
|
||||
|
||||
class VariableOrder final {
|
||||
// NODE STATE
|
||||
@ -85,6 +139,15 @@ class VariableOrder final {
|
||||
|
||||
AstUser1Allocator<AstVar, VarAttributes> m_attributes; // Attributes used for sorting
|
||||
|
||||
const MTaskAffinityMap& m_mTaskAffinity;
|
||||
|
||||
VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity)
|
||||
: m_mTaskAffinity{mTaskAffinity} {
|
||||
orderModuleVars(modp);
|
||||
}
|
||||
~VariableOrder() = default;
|
||||
VL_UNCOPYABLE(VariableOrder);
|
||||
|
||||
//######################################################################
|
||||
|
||||
// Simple sort
|
||||
@ -106,14 +169,20 @@ class VariableOrder final {
|
||||
// Sort by MTask-affinity first, then the same as simpleSortVars
|
||||
void tspSortVars(std::vector<AstVar*>& varps) {
|
||||
// Map from "MTask affinity" -> "variable list"
|
||||
std::map<const MTaskIdSet, std::vector<AstVar*>> m2v;
|
||||
for (AstVar* const varp : varps) m2v[varp->mtaskIds()].push_back(varp);
|
||||
std::map<const MTaskIdVec, std::vector<AstVar*>> m2v;
|
||||
const MTaskIdVec emptyVec(ExecMTask::numUsedIds(), false);
|
||||
for (AstVar* const varp : varps) {
|
||||
const auto it = m_mTaskAffinity.find(varp);
|
||||
const MTaskIdVec& key = it == m_mTaskAffinity.end() ? emptyVec : it->second;
|
||||
m2v[key].push_back(varp);
|
||||
}
|
||||
|
||||
// Create a TSP sort state for each unique MTaskIdSet, except for the empty set
|
||||
V3TSP::StateVec states;
|
||||
for (const auto& pair : m2v) {
|
||||
if (pair.first.empty()) continue;
|
||||
states.push_back(new VarTspSorter{pair.first});
|
||||
const MTaskIdVec& vec = pair.first;
|
||||
const bool empty = std::find(vec.begin(), vec.end(), true) == vec.end();
|
||||
if (!empty) states.push_back(new VarTspSorter{vec});
|
||||
}
|
||||
|
||||
// Do the TSP sort
|
||||
@ -131,12 +200,12 @@ class VariableOrder final {
|
||||
// Enumerate by sorted MTaskIdSet, sort within the set separately
|
||||
for (const V3TSP::TspStateBase* const stateBasep : sortedStates) {
|
||||
const VarTspSorter* const statep = dynamic_cast<const VarTspSorter*>(stateBasep);
|
||||
sortAndAppend(m2v[statep->mtaskIds()]);
|
||||
sortAndAppend(m2v[statep->mTaskIds()]);
|
||||
VL_DO_DANGLING(delete statep, statep);
|
||||
}
|
||||
|
||||
// Finally add the variables with no known MTask affinity
|
||||
sortAndAppend(m2v[MTaskIdSet()]);
|
||||
sortAndAppend(m2v[emptyVec]);
|
||||
}
|
||||
|
||||
void orderModuleVars(AstNodeModule* modp) {
|
||||
@ -190,17 +259,35 @@ class VariableOrder final {
|
||||
}
|
||||
|
||||
public:
|
||||
static void processModule(AstNodeModule* modp) { VariableOrder{}.orderModuleVars(modp); }
|
||||
static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity) {
|
||||
VariableOrder{modp, mTaskAffinity};
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// V3VariableOrder static functions
|
||||
|
||||
void V3VariableOrder::orderAll() {
|
||||
void V3VariableOrder::orderAll(AstNetlist* netlistp) {
|
||||
UINFO(2, __FUNCTION__ << ": " << endl);
|
||||
|
||||
MTaskAffinityMap mTaskAffinity;
|
||||
|
||||
// Gather MTask affinities
|
||||
if (v3Global.opt.mtasks()) {
|
||||
netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) {
|
||||
for (const V3GraphVertex* vtxp = execGraphp->depGraphp()->verticesBeginp(); vtxp;
|
||||
vtxp = vtxp->verticesNextp()) {
|
||||
GatherMTaskAffinity::apply(vtxp->as<const ExecMTask>(), mTaskAffinity);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Order variables in each module
|
||||
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
|
||||
modp = VN_AS(modp->nextp(), NodeModule)) {
|
||||
VariableOrder::processModule(modp);
|
||||
VariableOrder::processModule(modp, mTaskAffinity);
|
||||
}
|
||||
|
||||
// Done
|
||||
V3Global::dumpCheckGlobalTree("variableorder", 0, dumpTreeEitherLevel() >= 3);
|
||||
}
|
||||
|
@ -22,11 +22,13 @@
|
||||
|
||||
#include "V3ThreadSafety.h"
|
||||
|
||||
class AstNetlist;
|
||||
|
||||
//============================================================================
|
||||
|
||||
class V3VariableOrder final {
|
||||
public:
|
||||
static void orderAll() VL_MT_DISABLED;
|
||||
static void orderAll(AstNetlist*) VL_MT_DISABLED;
|
||||
};
|
||||
|
||||
#endif // Guard
|
||||
|
@ -563,7 +563,7 @@ static void process() {
|
||||
V3Common::commonAll();
|
||||
|
||||
// Order variables
|
||||
V3VariableOrder::orderAll();
|
||||
V3VariableOrder::orderAll(v3Global.rootp());
|
||||
|
||||
// Create AstCUse to determine what class forward declarations/#includes needed in C
|
||||
V3CUse::cUseAll();
|
||||
|
Loading…
Reference in New Issue
Block a user