Improve performance of V3VariableOrder with parallelization (#5406)

This commit is contained in:
Bartłomiej Chmiel 2024-09-06 14:04:26 +02:00 committed by GitHub
parent 1c38744fd1
commit d6923c8571
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 99 additions and 78 deletions

View File

@ -210,6 +210,7 @@ RAW_OBJS_PCH_ASTMT = \
V3Options.o \
V3Stats.o \
V3StatsReport.o \
V3VariableOrder.o \
RAW_OBJS_PCH_ASTNOMT = \
V3Active.o \
@ -309,7 +310,6 @@ RAW_OBJS_PCH_ASTNOMT = \
V3Undriven.o \
V3Unknown.o \
V3Unroll.o \
V3VariableOrder.o \
V3Width.o \
V3WidthCommit.o \
V3WidthSel.o \

View File

@ -2027,7 +2027,7 @@ public:
bool isRef() const VL_MT_SAFE { return m_direction.isRef(); }
bool isWritable() const VL_MT_SAFE { return m_direction.isWritable(); }
bool isTristate() const { return m_tristate; }
bool isPrimaryIO() const { return m_primaryIO; }
bool isPrimaryIO() const VL_MT_SAFE { return m_primaryIO; }
bool isPrimaryInish() const { return isPrimaryIO() && isNonOutput(); }
bool isIfaceRef() const { return (varType() == VVarType::IFACEREF); }
bool isIfaceParent() const { return m_isIfaceParent; }
@ -2051,15 +2051,15 @@ public:
AstBasicDType* bdtypep = basicp();
return bdtypep && bdtypep->isBitLogic();
}
bool isUsedClock() const { return m_usedClock; }
bool isUsedClock() const VL_MT_SAFE { return m_usedClock; }
bool isUsedParam() const { return m_usedParam; }
bool isUsedLoopIdx() const { return m_usedLoopIdx; }
bool isSc() const VL_MT_SAFE { return m_sc; }
bool isScQuad() const;
bool isScBv() const;
bool isScBv() const VL_MT_STABLE;
bool isScUint() const;
bool isScUintBool() const;
bool isScBigUint() const;
bool isScBigUint() const VL_MT_STABLE;
bool isScSensitive() const { return m_scSensitive; }
bool isSigPublic() const;
bool isSigModPublic() const { return m_sigModPublic; }

View File

@ -67,7 +67,7 @@ public:
}
// Return C++ class name for a module/class object
static string prefixNameProtect(const AstNode* nodep) VL_MT_STABLE;
static bool isAnonOk(const AstVar* varp) {
static bool isAnonOk(const AstVar* varp) VL_MT_STABLE {
AstNodeDType* const dtp = varp->dtypep()->skipRefp();
return v3Global.opt.compLimitMembers() != 0 // Enabled
&& !varp->isStatic() // Not a static variable

View File

@ -657,7 +657,7 @@ void v3errorEndFatal(std::ostringstream& sstr)
// Takes an optional "name" (as __VA_ARGS__)
#define VL_DEFINE_DUMP(func, tag) \
VL_ATTR_UNUSED static int dump##func() { \
VL_ATTR_UNUSED static int dump##func() VL_MT_SAFE { \
static int level = -1; \
if (VL_UNLIKELY(level < 0)) { \
const unsigned dumpTag = v3Global.opt.dumpLevel(tag); \

View File

@ -48,7 +48,7 @@ void ExecMTask::dump(std::ostream& str) const {
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
}
uint32_t ExecMTask::s_nextId = 0;
std::atomic<uint32_t> ExecMTask::s_nextId{0};
namespace V3ExecGraph {

View File

@ -22,6 +22,8 @@
#include "V3Graph.h"
#include <atomic>
class AstNetlist;
class AstMTaskBody;
@ -33,7 +35,7 @@ class ExecMTask final : public V3GraphVertex {
private:
AstMTaskBody* const m_bodyp; // Task body
const uint32_t m_id; // Unique ID of this ExecMTask.
static uint32_t s_nextId; // Next ID to use
static std::atomic<uint32_t> s_nextId; // Next ID to use
const std::string m_hashName; // Hashed name based on body for profile-driven optimization
// Predicted critical path from the start of this mtask to the ends of the graph that are
// reachable from this mtask. In abstract time units.
@ -57,7 +59,7 @@ public:
string hashName() const { return m_hashName; }
void dump(std::ostream& str) const;
static uint32_t numUsedIds() { return s_nextId; }
static uint32_t numUsedIds() VL_MT_SAFE { return s_nextId; }
};
inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) {

View File

@ -2032,7 +2032,7 @@ unsigned V3Options::dumpLevel(const string& tag) const VL_MT_SAFE {
return iter != m_dumpLevel.end() ? iter->second : 0;
}
unsigned V3Options::dumpSrcLevel(const string& srcfile_path) const {
unsigned V3Options::dumpSrcLevel(const string& srcfile_path) const VL_MT_SAFE {
// For simplicity, calling functions can just use __FILE__ for srcfile.
// That means we need to strip the filenames: ../Foo.cpp -> Foo
return dumpLevel(V3Os::filenameNonDirExt(srcfile_path));

View File

@ -432,7 +432,7 @@ public:
unsigned debugLevel(const string& tag) const VL_MT_SAFE;
unsigned debugSrcLevel(const string& srcfile_path) const VL_MT_SAFE;
unsigned dumpLevel(const string& tag) const VL_MT_SAFE;
unsigned dumpSrcLevel(const string& srcfile_path) const;
unsigned dumpSrcLevel(const string& srcfile_path) const VL_MT_SAFE;
// METHODS
void addCppFile(const string& filename);
@ -520,7 +520,7 @@ public:
bool pinsInoutEnables() const { return m_pinsInoutEnables; }
bool pinsScUint() const { return m_pinsScUint; }
bool pinsScUintBool() const { return m_pinsScUintBool; }
bool pinsScBigUint() const { return m_pinsScBigUint; }
bool pinsScBigUint() const VL_MT_SAFE { return m_pinsScBigUint; }
bool pinsUint8() const { return m_pinsUint8; }
bool ppComments() const { return m_ppComments; }
bool profC() const { return m_profC; }
@ -563,14 +563,14 @@ public:
int outputSplit() const { return m_outputSplit; }
int outputSplitCFuncs() const { return m_outputSplitCFuncs; }
int outputSplitCTrace() const { return m_outputSplitCTrace; }
int pinsBv() const { return m_pinsBv; }
int pinsBv() const VL_MT_SAFE { return m_pinsBv; }
int publicDepth() const { return m_publicDepth; }
int reloopLimit() const { return m_reloopLimit; }
VOptionBool skipIdentical() const { return m_skipIdentical; }
bool stopFail() const { return m_stopFail; }
int threads() const VL_MT_SAFE { return m_threads; }
int threadsMaxMTasks() const { return m_threadsMaxMTasks; }
bool mtasks() const { return (m_threads > 1); }
bool mtasks() const VL_MT_SAFE { return (m_threads > 1); }
VTimescale timeDefaultPrec() const { return m_timeDefaultPrec; }
VTimescale timeDefaultUnit() const { return m_timeDefaultUnit; }
VTimescale timeOverridePrec() const { return m_timeOverridePrec; }
@ -596,7 +596,7 @@ public:
int verilateJobs() const { return m_verilateJobs; }
int compLimitBlocks() const { return m_compLimitBlocks; }
int compLimitMembers() const { return m_compLimitMembers; }
int compLimitMembers() const VL_MT_SAFE { return m_compLimitMembers; }
int compLimitParens() const { return m_compLimitParens; }
string exeName() const { return m_exeName != "" ? m_exeName : prefix(); }
@ -693,7 +693,7 @@ public:
}
bool hierarchical() const { return m_hierarchical; }
int hierChild() const { return m_hierChild; }
int hierChild() const VL_MT_SAFE { return m_hierChild; }
bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); }
const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; }
// Directory to save .tree, .dot, .dat, .vpp for hierarchical block top

View File

@ -433,7 +433,7 @@ private:
//######################################################################
// Main algorithm
void V3TSP::tspSort(const V3TSP::StateVec& states, V3TSP::StateVec* resultp) {
void V3TSP::tspSort(const V3TSP::StateVec& states, V3TSP::StateVec* resultp) VL_MT_SAFE {
UASSERT(resultp->empty(), "Output graph must start empty");
// Make this TSP implementation work for graphs of size 0 or 1
@ -536,14 +536,14 @@ public:
, m_ypos{ypos}
, m_serial{++s_serialNext} {}
~TspTestState() override = default;
int cost(const TspStateBase* otherp) const override {
int cost(const TspStateBase* otherp) const override VL_MT_SAFE {
return cost(dynamic_cast<const TspTestState*>(otherp));
}
static unsigned diff(unsigned a, unsigned b) {
static unsigned diff(unsigned a, unsigned b) VL_PURE {
if (a > b) return a - b;
return b - a;
}
virtual int cost(const TspTestState* otherp) const {
int cost(const TspTestState* otherp) const VL_PURE {
// For test purposes, each TspTestState is merely a point
// on the Cartesian plane; cost is the linear distance
// between two points.

View File

@ -34,7 +34,7 @@ public:
// This is the cost function that the TSP sort will minimize.
// All costs in V3TSP are int, chosen to match the type of
// V3GraphEdge::weight() which will reflect each edge's cost.
virtual int cost(const TspStateBase* otherp) const = 0;
virtual int cost(const TspStateBase* otherp) const VL_MT_SAFE = 0;
// This operator< must place a meaningless, arbitrary, but
// stable order on all TspStateBase's. It's used only to
@ -49,7 +49,7 @@ using StateVec = std::vector<const TspStateBase*>;
// Given an unsorted set of TspState's, sort them to minimize
// the transition cost for walking the sorted list.
void tspSort(const StateVec& states, StateVec* resultp) VL_MT_DISABLED;
void tspSort(const StateVec& states, StateVec* resultp) VL_MT_SAFE;
void selfTest() VL_MT_DISABLED;
} // namespace V3TSP

View File

@ -20,7 +20,7 @@
//
//*************************************************************************
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
#include "V3PchAstMT.h"
#include "V3VariableOrder.h"
@ -28,6 +28,7 @@
#include "V3EmitCBase.h"
#include "V3ExecGraph.h"
#include "V3TSP.h"
#include "V3ThreadPool.h"
#include <vector>
@ -113,10 +114,10 @@ public:
}
bool operator<(const VarTspSorter& other) const { return m_serial < other.m_serial; }
const MTaskIdVec& mTaskIds() const { return m_mTaskIds; }
int cost(const TspStateBase* otherp) const override {
int cost(const TspStateBase* otherp) const override VL_MT_SAFE {
return cost(static_cast<const VarTspSorter*>(otherp));
}
int cost(const VarTspSorter* otherp) const {
int cost(const VarTspSorter* otherp) const VL_MT_SAFE {
// Compute the number of MTasks not shared (Hamming distance)
int cost = 0;
const size_t size = ExecMTask::numUsedIds();
@ -127,22 +128,20 @@ public:
uint32_t VarTspSorter::s_serialNext = 0;
struct VarAttributes final {
uint8_t stratum; // Roughly equivalent to alignment requirement, to avoid padding
bool anonOk; // Can be emitted as part of anonymous structure
};
class VariableOrder final {
// NODE STATE
// AstVar::user1() -> attributes, via m_attributes
const VNUser1InUse m_user1InUse; // AstVar
struct VarAttributes final {
uint32_t stratum; // Roughly equivalent to alignment requirement, to avoid padding
bool anonOk; // Can be emitted as part of anonymous structure
};
AstUser1Allocator<AstVar, VarAttributes> m_attributes; // Attributes used for sorting
std::unordered_map<const AstVar*, VarAttributes> m_attributes;
const MTaskAffinityMap& m_mTaskAffinity;
std::vector<AstVar*>& m_varps;
VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity)
: m_mTaskAffinity{mTaskAffinity} {
VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity,
std::vector<AstVar*>& varps)
: m_mTaskAffinity{mTaskAffinity}
, m_varps{varps} {
orderModuleVars(modp);
}
~VariableOrder() = default;
@ -157,8 +156,11 @@ class VariableOrder final {
if (ap->isStatic() != bp->isStatic()) { // Non-statics before statics
return bp->isStatic();
}
const auto& attrA = m_attributes(ap);
const auto& attrB = m_attributes(bp);
UASSERT(m_attributes.find(ap) != m_attributes.end()
&& m_attributes.find(bp) != m_attributes.end(),
"m_attributes should be populated for each AstVar");
const auto& attrA = m_attributes.at(ap);
const auto& attrB = m_attributes.at(bp);
if (attrA.anonOk != attrB.anonOk) { // Anons before non-anons
return attrA.anonOk;
}
@ -209,58 +211,42 @@ class VariableOrder final {
}
void orderModuleVars(AstNodeModule* modp) {
std::vector<AstVar*> varps;
// Unlink all module variables from the module, compute attributes
for (AstNode *nodep = modp->stmtsp(), *nextp; nodep; nodep = nextp) {
nextp = nodep->nextp();
if (AstVar* const varp = VN_CAST(nodep, Var)) {
// Unlink, add to vector
varp->unlinkFrBack();
varps.push_back(varp);
m_varps.push_back(varp);
// Compute attributes up front
auto& attributes = m_attributes(varp);
// Stratum
const int sigbytes = varp->dtypeSkipRefp()->widthAlignBytes();
attributes.stratum = (v3Global.opt.hierChild() && varp->isPrimaryIO()) ? 0
: (varp->isUsedClock() && varp->widthMin() == 1) ? 1
: VN_IS(varp->dtypeSkipRefp(), UnpackArrayDType) ? 9
: (varp->basicp() && varp->basicp()->isOpaque()) ? 8
: (varp->isScBv() || varp->isScBigUint()) ? 7
: (sigbytes == 8) ? 6
: (sigbytes == 4) ? 5
: (sigbytes == 2) ? 3
: (sigbytes == 1) ? 2
: 10;
// Anonymous structure ok
attributes.anonOk = EmitCBase::isAnonOk(varp);
const uint8_t stratum = (v3Global.opt.hierChild() && varp->isPrimaryIO()) ? 0
: (varp->isUsedClock() && varp->widthMin() == 1) ? 1
: VN_IS(varp->dtypeSkipRefp(), UnpackArrayDType) ? 9
: (varp->basicp() && varp->basicp()->isOpaque()) ? 8
: (varp->isScBv() || varp->isScBigUint()) ? 7
: (sigbytes == 8) ? 6
: (sigbytes == 4) ? 5
: (sigbytes == 2) ? 3
: (sigbytes == 1) ? 2
: 10;
m_attributes.emplace(varp, VarAttributes{stratum, EmitCBase::isAnonOk(varp)});
}
}
if (!varps.empty()) {
// Sort variables
if (!m_varps.empty()) {
if (!v3Global.opt.mtasks()) {
simpleSortVars(varps);
simpleSortVars(m_varps);
} else {
tspSortVars(varps);
tspSortVars(m_varps);
}
// Insert them back under the module, in the new order, but at
// the front of the list so they come out first in dumps/XML.
auto it = varps.cbegin();
AstVar* const firstp = *it++;
for (; it != varps.cend(); ++it) firstp->addNext(*it);
if (AstNode* const stmtsp = modp->stmtsp()) {
stmtsp->unlinkFrBackWithNext();
AstNode::addNext<AstNode, AstNode>(firstp, stmtsp);
}
modp->addStmtsp(firstp);
}
}
public:
static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity) {
VariableOrder{modp, mTaskAffinity};
static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity,
std::vector<AstVar*>& varps) VL_MT_STABLE {
VariableOrder{modp, mTaskAffinity, varps};
}
};
@ -280,11 +266,44 @@ void V3VariableOrder::orderAll(AstNetlist* netlistp) {
}
});
}
if (v3Global.opt.stats()) V3Stats::statsStage("variableorder-gather");
// Order variables in each module
// Sort variables for each module
std::unordered_map<AstNodeModule*, std::vector<AstVar*>> sortedVars;
{
V3ThreadScope threadScope;
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
modp = VN_AS(modp->nextp(), NodeModule)) {
std::vector<AstVar*>& varps = sortedVars[modp];
threadScope.enqueue([modp, mTaskAffinity, &varps]() {
VariableOrder::processModule(modp, mTaskAffinity, varps);
});
}
}
if (v3Global.opt.stats()) V3Stats::statsStage("variableorder-sort");
// Insert them back under the module, in the new order, but at
// the front of the list so they come out first in dumps/XML.
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
modp = VN_AS(modp->nextp(), NodeModule)) {
VariableOrder::processModule(modp, mTaskAffinity);
const std::vector<AstVar*>& varps = sortedVars[modp];
if (!varps.empty()) {
auto it = varps.cbegin();
AstVar* const firstp = *it++;
firstp->unlinkFrBack();
for (; it != varps.cend(); ++it) {
AstVar* const varp = *it;
varp->unlinkFrBack();
firstp->addNext(varp);
}
if (AstNode* const stmtsp = modp->stmtsp()) {
stmtsp->unlinkFrBackWithNext();
AstNode::addNext<AstNode, AstNode>(firstp, stmtsp);
}
modp->addStmtsp(firstp);
}
}
// Done

View File

@ -26,7 +26,7 @@ class AstNetlist;
class V3VariableOrder final {
public:
static void orderAll(AstNetlist*) VL_MT_DISABLED;
static void orderAll(AstNetlist*);
};
#endif // Guard