mirror of
https://github.com/verilator/verilator.git
synced 2025-04-05 04:02:37 +00:00
Improve performance of V3VariableOrder with parallelization (#5406)
This commit is contained in:
parent
1c38744fd1
commit
d6923c8571
@ -210,6 +210,7 @@ RAW_OBJS_PCH_ASTMT = \
|
||||
V3Options.o \
|
||||
V3Stats.o \
|
||||
V3StatsReport.o \
|
||||
V3VariableOrder.o \
|
||||
|
||||
RAW_OBJS_PCH_ASTNOMT = \
|
||||
V3Active.o \
|
||||
@ -309,7 +310,6 @@ RAW_OBJS_PCH_ASTNOMT = \
|
||||
V3Undriven.o \
|
||||
V3Unknown.o \
|
||||
V3Unroll.o \
|
||||
V3VariableOrder.o \
|
||||
V3Width.o \
|
||||
V3WidthCommit.o \
|
||||
V3WidthSel.o \
|
||||
|
@ -2027,7 +2027,7 @@ public:
|
||||
bool isRef() const VL_MT_SAFE { return m_direction.isRef(); }
|
||||
bool isWritable() const VL_MT_SAFE { return m_direction.isWritable(); }
|
||||
bool isTristate() const { return m_tristate; }
|
||||
bool isPrimaryIO() const { return m_primaryIO; }
|
||||
bool isPrimaryIO() const VL_MT_SAFE { return m_primaryIO; }
|
||||
bool isPrimaryInish() const { return isPrimaryIO() && isNonOutput(); }
|
||||
bool isIfaceRef() const { return (varType() == VVarType::IFACEREF); }
|
||||
bool isIfaceParent() const { return m_isIfaceParent; }
|
||||
@ -2051,15 +2051,15 @@ public:
|
||||
AstBasicDType* bdtypep = basicp();
|
||||
return bdtypep && bdtypep->isBitLogic();
|
||||
}
|
||||
bool isUsedClock() const { return m_usedClock; }
|
||||
bool isUsedClock() const VL_MT_SAFE { return m_usedClock; }
|
||||
bool isUsedParam() const { return m_usedParam; }
|
||||
bool isUsedLoopIdx() const { return m_usedLoopIdx; }
|
||||
bool isSc() const VL_MT_SAFE { return m_sc; }
|
||||
bool isScQuad() const;
|
||||
bool isScBv() const;
|
||||
bool isScBv() const VL_MT_STABLE;
|
||||
bool isScUint() const;
|
||||
bool isScUintBool() const;
|
||||
bool isScBigUint() const;
|
||||
bool isScBigUint() const VL_MT_STABLE;
|
||||
bool isScSensitive() const { return m_scSensitive; }
|
||||
bool isSigPublic() const;
|
||||
bool isSigModPublic() const { return m_sigModPublic; }
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
}
|
||||
// Return C++ class name for a module/class object
|
||||
static string prefixNameProtect(const AstNode* nodep) VL_MT_STABLE;
|
||||
static bool isAnonOk(const AstVar* varp) {
|
||||
static bool isAnonOk(const AstVar* varp) VL_MT_STABLE {
|
||||
AstNodeDType* const dtp = varp->dtypep()->skipRefp();
|
||||
return v3Global.opt.compLimitMembers() != 0 // Enabled
|
||||
&& !varp->isStatic() // Not a static variable
|
||||
|
@ -657,7 +657,7 @@ void v3errorEndFatal(std::ostringstream& sstr)
|
||||
|
||||
// Takes an optional "name" (as __VA_ARGS__)
|
||||
#define VL_DEFINE_DUMP(func, tag) \
|
||||
VL_ATTR_UNUSED static int dump##func() { \
|
||||
VL_ATTR_UNUSED static int dump##func() VL_MT_SAFE { \
|
||||
static int level = -1; \
|
||||
if (VL_UNLIKELY(level < 0)) { \
|
||||
const unsigned dumpTag = v3Global.opt.dumpLevel(tag); \
|
||||
|
@ -48,7 +48,7 @@ void ExecMTask::dump(std::ostream& str) const {
|
||||
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
||||
}
|
||||
|
||||
uint32_t ExecMTask::s_nextId = 0;
|
||||
std::atomic<uint32_t> ExecMTask::s_nextId{0};
|
||||
|
||||
namespace V3ExecGraph {
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
#include "V3Graph.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
class AstNetlist;
|
||||
class AstMTaskBody;
|
||||
|
||||
@ -33,7 +35,7 @@ class ExecMTask final : public V3GraphVertex {
|
||||
private:
|
||||
AstMTaskBody* const m_bodyp; // Task body
|
||||
const uint32_t m_id; // Unique ID of this ExecMTask.
|
||||
static uint32_t s_nextId; // Next ID to use
|
||||
static std::atomic<uint32_t> s_nextId; // Next ID to use
|
||||
const std::string m_hashName; // Hashed name based on body for profile-driven optimization
|
||||
// Predicted critical path from the start of this mtask to the ends of the graph that are
|
||||
// reachable from this mtask. In abstract time units.
|
||||
@ -57,7 +59,7 @@ public:
|
||||
string hashName() const { return m_hashName; }
|
||||
void dump(std::ostream& str) const;
|
||||
|
||||
static uint32_t numUsedIds() { return s_nextId; }
|
||||
static uint32_t numUsedIds() VL_MT_SAFE { return s_nextId; }
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) {
|
||||
|
@ -2032,7 +2032,7 @@ unsigned V3Options::dumpLevel(const string& tag) const VL_MT_SAFE {
|
||||
return iter != m_dumpLevel.end() ? iter->second : 0;
|
||||
}
|
||||
|
||||
unsigned V3Options::dumpSrcLevel(const string& srcfile_path) const {
|
||||
unsigned V3Options::dumpSrcLevel(const string& srcfile_path) const VL_MT_SAFE {
|
||||
// For simplicity, calling functions can just use __FILE__ for srcfile.
|
||||
// That means we need to strip the filenames: ../Foo.cpp -> Foo
|
||||
return dumpLevel(V3Os::filenameNonDirExt(srcfile_path));
|
||||
|
@ -432,7 +432,7 @@ public:
|
||||
unsigned debugLevel(const string& tag) const VL_MT_SAFE;
|
||||
unsigned debugSrcLevel(const string& srcfile_path) const VL_MT_SAFE;
|
||||
unsigned dumpLevel(const string& tag) const VL_MT_SAFE;
|
||||
unsigned dumpSrcLevel(const string& srcfile_path) const;
|
||||
unsigned dumpSrcLevel(const string& srcfile_path) const VL_MT_SAFE;
|
||||
|
||||
// METHODS
|
||||
void addCppFile(const string& filename);
|
||||
@ -520,7 +520,7 @@ public:
|
||||
bool pinsInoutEnables() const { return m_pinsInoutEnables; }
|
||||
bool pinsScUint() const { return m_pinsScUint; }
|
||||
bool pinsScUintBool() const { return m_pinsScUintBool; }
|
||||
bool pinsScBigUint() const { return m_pinsScBigUint; }
|
||||
bool pinsScBigUint() const VL_MT_SAFE { return m_pinsScBigUint; }
|
||||
bool pinsUint8() const { return m_pinsUint8; }
|
||||
bool ppComments() const { return m_ppComments; }
|
||||
bool profC() const { return m_profC; }
|
||||
@ -563,14 +563,14 @@ public:
|
||||
int outputSplit() const { return m_outputSplit; }
|
||||
int outputSplitCFuncs() const { return m_outputSplitCFuncs; }
|
||||
int outputSplitCTrace() const { return m_outputSplitCTrace; }
|
||||
int pinsBv() const { return m_pinsBv; }
|
||||
int pinsBv() const VL_MT_SAFE { return m_pinsBv; }
|
||||
int publicDepth() const { return m_publicDepth; }
|
||||
int reloopLimit() const { return m_reloopLimit; }
|
||||
VOptionBool skipIdentical() const { return m_skipIdentical; }
|
||||
bool stopFail() const { return m_stopFail; }
|
||||
int threads() const VL_MT_SAFE { return m_threads; }
|
||||
int threadsMaxMTasks() const { return m_threadsMaxMTasks; }
|
||||
bool mtasks() const { return (m_threads > 1); }
|
||||
bool mtasks() const VL_MT_SAFE { return (m_threads > 1); }
|
||||
VTimescale timeDefaultPrec() const { return m_timeDefaultPrec; }
|
||||
VTimescale timeDefaultUnit() const { return m_timeDefaultUnit; }
|
||||
VTimescale timeOverridePrec() const { return m_timeOverridePrec; }
|
||||
@ -596,7 +596,7 @@ public:
|
||||
int verilateJobs() const { return m_verilateJobs; }
|
||||
|
||||
int compLimitBlocks() const { return m_compLimitBlocks; }
|
||||
int compLimitMembers() const { return m_compLimitMembers; }
|
||||
int compLimitMembers() const VL_MT_SAFE { return m_compLimitMembers; }
|
||||
int compLimitParens() const { return m_compLimitParens; }
|
||||
|
||||
string exeName() const { return m_exeName != "" ? m_exeName : prefix(); }
|
||||
@ -693,7 +693,7 @@ public:
|
||||
}
|
||||
|
||||
bool hierarchical() const { return m_hierarchical; }
|
||||
int hierChild() const { return m_hierChild; }
|
||||
int hierChild() const VL_MT_SAFE { return m_hierChild; }
|
||||
bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); }
|
||||
const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; }
|
||||
// Directory to save .tree, .dot, .dat, .vpp for hierarchical block top
|
||||
|
@ -433,7 +433,7 @@ private:
|
||||
//######################################################################
|
||||
// Main algorithm
|
||||
|
||||
void V3TSP::tspSort(const V3TSP::StateVec& states, V3TSP::StateVec* resultp) {
|
||||
void V3TSP::tspSort(const V3TSP::StateVec& states, V3TSP::StateVec* resultp) VL_MT_SAFE {
|
||||
UASSERT(resultp->empty(), "Output graph must start empty");
|
||||
|
||||
// Make this TSP implementation work for graphs of size 0 or 1
|
||||
@ -536,14 +536,14 @@ public:
|
||||
, m_ypos{ypos}
|
||||
, m_serial{++s_serialNext} {}
|
||||
~TspTestState() override = default;
|
||||
int cost(const TspStateBase* otherp) const override {
|
||||
int cost(const TspStateBase* otherp) const override VL_MT_SAFE {
|
||||
return cost(dynamic_cast<const TspTestState*>(otherp));
|
||||
}
|
||||
static unsigned diff(unsigned a, unsigned b) {
|
||||
static unsigned diff(unsigned a, unsigned b) VL_PURE {
|
||||
if (a > b) return a - b;
|
||||
return b - a;
|
||||
}
|
||||
virtual int cost(const TspTestState* otherp) const {
|
||||
int cost(const TspTestState* otherp) const VL_PURE {
|
||||
// For test purposes, each TspTestState is merely a point
|
||||
// on the Cartesian plane; cost is the linear distance
|
||||
// between two points.
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
// This is the cost function that the TSP sort will minimize.
|
||||
// All costs in V3TSP are int, chosen to match the type of
|
||||
// V3GraphEdge::weight() which will reflect each edge's cost.
|
||||
virtual int cost(const TspStateBase* otherp) const = 0;
|
||||
virtual int cost(const TspStateBase* otherp) const VL_MT_SAFE = 0;
|
||||
|
||||
// This operator< must place a meaningless, arbitrary, but
|
||||
// stable order on all TspStateBase's. It's used only to
|
||||
@ -49,7 +49,7 @@ using StateVec = std::vector<const TspStateBase*>;
|
||||
|
||||
// Given an unsorted set of TspState's, sort them to minimize
|
||||
// the transition cost for walking the sorted list.
|
||||
void tspSort(const StateVec& states, StateVec* resultp) VL_MT_DISABLED;
|
||||
void tspSort(const StateVec& states, StateVec* resultp) VL_MT_SAFE;
|
||||
|
||||
void selfTest() VL_MT_DISABLED;
|
||||
} // namespace V3TSP
|
||||
|
@ -20,7 +20,7 @@
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
||||
#include "V3PchAstMT.h"
|
||||
|
||||
#include "V3VariableOrder.h"
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include "V3EmitCBase.h"
|
||||
#include "V3ExecGraph.h"
|
||||
#include "V3TSP.h"
|
||||
#include "V3ThreadPool.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -113,10 +114,10 @@ public:
|
||||
}
|
||||
bool operator<(const VarTspSorter& other) const { return m_serial < other.m_serial; }
|
||||
const MTaskIdVec& mTaskIds() const { return m_mTaskIds; }
|
||||
int cost(const TspStateBase* otherp) const override {
|
||||
int cost(const TspStateBase* otherp) const override VL_MT_SAFE {
|
||||
return cost(static_cast<const VarTspSorter*>(otherp));
|
||||
}
|
||||
int cost(const VarTspSorter* otherp) const {
|
||||
int cost(const VarTspSorter* otherp) const VL_MT_SAFE {
|
||||
// Compute the number of MTasks not shared (Hamming distance)
|
||||
int cost = 0;
|
||||
const size_t size = ExecMTask::numUsedIds();
|
||||
@ -127,22 +128,20 @@ public:
|
||||
|
||||
uint32_t VarTspSorter::s_serialNext = 0;
|
||||
|
||||
struct VarAttributes final {
|
||||
uint8_t stratum; // Roughly equivalent to alignment requirement, to avoid padding
|
||||
bool anonOk; // Can be emitted as part of anonymous structure
|
||||
};
|
||||
class VariableOrder final {
|
||||
// NODE STATE
|
||||
// AstVar::user1() -> attributes, via m_attributes
|
||||
const VNUser1InUse m_user1InUse; // AstVar
|
||||
|
||||
struct VarAttributes final {
|
||||
uint32_t stratum; // Roughly equivalent to alignment requirement, to avoid padding
|
||||
bool anonOk; // Can be emitted as part of anonymous structure
|
||||
};
|
||||
|
||||
AstUser1Allocator<AstVar, VarAttributes> m_attributes; // Attributes used for sorting
|
||||
std::unordered_map<const AstVar*, VarAttributes> m_attributes;
|
||||
|
||||
const MTaskAffinityMap& m_mTaskAffinity;
|
||||
std::vector<AstVar*>& m_varps;
|
||||
|
||||
VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity)
|
||||
: m_mTaskAffinity{mTaskAffinity} {
|
||||
VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity,
|
||||
std::vector<AstVar*>& varps)
|
||||
: m_mTaskAffinity{mTaskAffinity}
|
||||
, m_varps{varps} {
|
||||
orderModuleVars(modp);
|
||||
}
|
||||
~VariableOrder() = default;
|
||||
@ -157,8 +156,11 @@ class VariableOrder final {
|
||||
if (ap->isStatic() != bp->isStatic()) { // Non-statics before statics
|
||||
return bp->isStatic();
|
||||
}
|
||||
const auto& attrA = m_attributes(ap);
|
||||
const auto& attrB = m_attributes(bp);
|
||||
UASSERT(m_attributes.find(ap) != m_attributes.end()
|
||||
&& m_attributes.find(bp) != m_attributes.end(),
|
||||
"m_attributes should be populated for each AstVar");
|
||||
const auto& attrA = m_attributes.at(ap);
|
||||
const auto& attrB = m_attributes.at(bp);
|
||||
if (attrA.anonOk != attrB.anonOk) { // Anons before non-anons
|
||||
return attrA.anonOk;
|
||||
}
|
||||
@ -209,58 +211,42 @@ class VariableOrder final {
|
||||
}
|
||||
|
||||
void orderModuleVars(AstNodeModule* modp) {
|
||||
std::vector<AstVar*> varps;
|
||||
|
||||
// Unlink all module variables from the module, compute attributes
|
||||
for (AstNode *nodep = modp->stmtsp(), *nextp; nodep; nodep = nextp) {
|
||||
nextp = nodep->nextp();
|
||||
if (AstVar* const varp = VN_CAST(nodep, Var)) {
|
||||
// Unlink, add to vector
|
||||
varp->unlinkFrBack();
|
||||
varps.push_back(varp);
|
||||
m_varps.push_back(varp);
|
||||
|
||||
// Compute attributes up front
|
||||
auto& attributes = m_attributes(varp);
|
||||
// Stratum
|
||||
const int sigbytes = varp->dtypeSkipRefp()->widthAlignBytes();
|
||||
attributes.stratum = (v3Global.opt.hierChild() && varp->isPrimaryIO()) ? 0
|
||||
: (varp->isUsedClock() && varp->widthMin() == 1) ? 1
|
||||
: VN_IS(varp->dtypeSkipRefp(), UnpackArrayDType) ? 9
|
||||
: (varp->basicp() && varp->basicp()->isOpaque()) ? 8
|
||||
: (varp->isScBv() || varp->isScBigUint()) ? 7
|
||||
: (sigbytes == 8) ? 6
|
||||
: (sigbytes == 4) ? 5
|
||||
: (sigbytes == 2) ? 3
|
||||
: (sigbytes == 1) ? 2
|
||||
: 10;
|
||||
// Anonymous structure ok
|
||||
attributes.anonOk = EmitCBase::isAnonOk(varp);
|
||||
const uint8_t stratum = (v3Global.opt.hierChild() && varp->isPrimaryIO()) ? 0
|
||||
: (varp->isUsedClock() && varp->widthMin() == 1) ? 1
|
||||
: VN_IS(varp->dtypeSkipRefp(), UnpackArrayDType) ? 9
|
||||
: (varp->basicp() && varp->basicp()->isOpaque()) ? 8
|
||||
: (varp->isScBv() || varp->isScBigUint()) ? 7
|
||||
: (sigbytes == 8) ? 6
|
||||
: (sigbytes == 4) ? 5
|
||||
: (sigbytes == 2) ? 3
|
||||
: (sigbytes == 1) ? 2
|
||||
: 10;
|
||||
m_attributes.emplace(varp, VarAttributes{stratum, EmitCBase::isAnonOk(varp)});
|
||||
}
|
||||
}
|
||||
|
||||
if (!varps.empty()) {
|
||||
// Sort variables
|
||||
if (!m_varps.empty()) {
|
||||
if (!v3Global.opt.mtasks()) {
|
||||
simpleSortVars(varps);
|
||||
simpleSortVars(m_varps);
|
||||
} else {
|
||||
tspSortVars(varps);
|
||||
tspSortVars(m_varps);
|
||||
}
|
||||
|
||||
// Insert them back under the module, in the new order, but at
|
||||
// the front of the list so they come out first in dumps/XML.
|
||||
auto it = varps.cbegin();
|
||||
AstVar* const firstp = *it++;
|
||||
for (; it != varps.cend(); ++it) firstp->addNext(*it);
|
||||
if (AstNode* const stmtsp = modp->stmtsp()) {
|
||||
stmtsp->unlinkFrBackWithNext();
|
||||
AstNode::addNext<AstNode, AstNode>(firstp, stmtsp);
|
||||
}
|
||||
modp->addStmtsp(firstp);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity) {
|
||||
VariableOrder{modp, mTaskAffinity};
|
||||
static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity,
|
||||
std::vector<AstVar*>& varps) VL_MT_STABLE {
|
||||
VariableOrder{modp, mTaskAffinity, varps};
|
||||
}
|
||||
};
|
||||
|
||||
@ -280,11 +266,44 @@ void V3VariableOrder::orderAll(AstNetlist* netlistp) {
|
||||
}
|
||||
});
|
||||
}
|
||||
if (v3Global.opt.stats()) V3Stats::statsStage("variableorder-gather");
|
||||
|
||||
// Order variables in each module
|
||||
// Sort variables for each module
|
||||
std::unordered_map<AstNodeModule*, std::vector<AstVar*>> sortedVars;
|
||||
{
|
||||
V3ThreadScope threadScope;
|
||||
|
||||
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
|
||||
modp = VN_AS(modp->nextp(), NodeModule)) {
|
||||
std::vector<AstVar*>& varps = sortedVars[modp];
|
||||
threadScope.enqueue([modp, mTaskAffinity, &varps]() {
|
||||
VariableOrder::processModule(modp, mTaskAffinity, varps);
|
||||
});
|
||||
}
|
||||
}
|
||||
if (v3Global.opt.stats()) V3Stats::statsStage("variableorder-sort");
|
||||
|
||||
// Insert them back under the module, in the new order, but at
|
||||
// the front of the list so they come out first in dumps/XML.
|
||||
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
|
||||
modp = VN_AS(modp->nextp(), NodeModule)) {
|
||||
VariableOrder::processModule(modp, mTaskAffinity);
|
||||
const std::vector<AstVar*>& varps = sortedVars[modp];
|
||||
|
||||
if (!varps.empty()) {
|
||||
auto it = varps.cbegin();
|
||||
AstVar* const firstp = *it++;
|
||||
firstp->unlinkFrBack();
|
||||
for (; it != varps.cend(); ++it) {
|
||||
AstVar* const varp = *it;
|
||||
varp->unlinkFrBack();
|
||||
firstp->addNext(varp);
|
||||
}
|
||||
if (AstNode* const stmtsp = modp->stmtsp()) {
|
||||
stmtsp->unlinkFrBackWithNext();
|
||||
AstNode::addNext<AstNode, AstNode>(firstp, stmtsp);
|
||||
}
|
||||
modp->addStmtsp(firstp);
|
||||
}
|
||||
}
|
||||
|
||||
// Done
|
||||
|
@ -26,7 +26,7 @@ class AstNetlist;
|
||||
|
||||
class V3VariableOrder final {
|
||||
public:
|
||||
static void orderAll(AstNetlist*) VL_MT_DISABLED;
|
||||
static void orderAll(AstNetlist*);
|
||||
};
|
||||
|
||||
#endif // Guard
|
||||
|
Loading…
Reference in New Issue
Block a user