forked from github/verilator
Add duplicate clock gate optimization, msg980.
Experimental and disabled unless -OD or -O3 used (for now), Please try it as may get some significant speedups. Signed-off-by: Wilson Snyder <wsnyder@wsnyder.org>
This commit is contained in:
parent
f2fb77c15a
commit
e0edb596ea
4
Changes
4
Changes
@ -5,6 +5,10 @@ indicates the contributor was also the author of the fix; Thanks!
|
||||
|
||||
* Verilator 3.846-devel
|
||||
|
||||
*** Add duplicate clock gate optimization, msg980. [Varun Koyyalagunta]
|
||||
Disabled unless -OD or -O3 used, please try it as may get some
|
||||
significant speedups.
|
||||
|
||||
**** Support pattern assignment features, bug616, bug617, bug618. [Ed Lander]
|
||||
|
||||
**** Support bind in $unit, bug602. [Ed Lander]
|
||||
|
@ -1136,6 +1136,7 @@ public:
|
||||
virtual bool isPure() const { return true; } // Else a $display, etc, that must be ordered with other displays
|
||||
virtual bool isBrancher() const { return false; } // Changes control flow, disable some optimizations
|
||||
virtual bool isGateOptimizable() const { return true; } // Else a AstTime etc that can't be pushed out
|
||||
virtual bool isGateDedupable() const { return isGateOptimizable(); } // GateDedupable is a slightly larger superset of GateOptimzable (eg, AstNodeIf)
|
||||
virtual bool isSubstOptimizable() const { return true; } // Else a AstTime etc that can't be substituted out
|
||||
virtual bool isPredictOptimizable() const { return true; } // Else a AstTime etc which output can't be predicted from input
|
||||
virtual bool isOutputter() const { return false; } // Else creates output or exits, etc, not unconsumed
|
||||
@ -1389,6 +1390,7 @@ public:
|
||||
void addIfsp(AstNode* newp) { addOp2p(newp); }
|
||||
void addElsesp(AstNode* newp) { addOp3p(newp); }
|
||||
virtual bool isGateOptimizable() const { return false; }
|
||||
virtual bool isGateDedupable() const { return true; }
|
||||
virtual int instrCount() const { return instrCountBranch(); }
|
||||
virtual V3Hash sameHash() const { return V3Hash(); }
|
||||
virtual bool same(AstNode* samep) const { return true; }
|
||||
|
297
src/V3Gate.cpp
297
src/V3Gate.cpp
@ -41,6 +41,7 @@
|
||||
#include "V3Graph.h"
|
||||
#include "V3Const.h"
|
||||
#include "V3Stats.h"
|
||||
#include "V3Hashed.h"
|
||||
|
||||
typedef list<AstNodeVarRef*> GateVarRefList;
|
||||
|
||||
@ -55,21 +56,33 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
|
||||
class GateLogicVertex;
|
||||
class GateVarVertex;
|
||||
class GateGraphBaseVisitor {
|
||||
public:
|
||||
virtual AstNUser* visit(GateLogicVertex* vertexp, AstNUser* vup=NULL) =0;
|
||||
virtual AstNUser* visit(GateVarVertex* vertexp, AstNUser* vup=NULL) =0;
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// Support classes
|
||||
|
||||
class GateEitherVertex : public V3GraphVertex {
|
||||
AstScope* m_scopep;
|
||||
bool m_reducible; // True if this node should be able to be eliminated
|
||||
bool m_dedupable; // True if this node should be able to be deduped
|
||||
bool m_consumed; // Output goes to something meaningful
|
||||
public:
|
||||
GateEitherVertex(V3Graph* graphp, AstScope* scopep)
|
||||
: V3GraphVertex(graphp), m_scopep(scopep), m_reducible(true), m_consumed(false) {}
|
||||
: V3GraphVertex(graphp), m_scopep(scopep), m_reducible(true), m_dedupable(true), m_consumed(false) {}
|
||||
virtual ~GateEitherVertex() {}
|
||||
// ACCESSORS
|
||||
virtual string dotStyle() const { return m_consumed?"":"dotted"; }
|
||||
AstScope* scopep() const { return m_scopep; }
|
||||
bool reducible() const { return m_reducible; }
|
||||
bool dedupable() const { return m_dedupable; }
|
||||
void setConsumed(const char* consumedReason) {
|
||||
m_consumed = true;
|
||||
//UINFO(0,"\t\tSetConsumed "<<consumedReason<<" "<<this<<endl);
|
||||
@ -79,6 +92,23 @@ public:
|
||||
m_reducible = false;
|
||||
//UINFO(0," NR: "<<nonReducibleReason<<" "<<name()<<endl);
|
||||
}
|
||||
void clearDedupable(const char* nonDedupableReason) {
|
||||
m_dedupable = false;
|
||||
//UINFO(0," ND: "<<nonDedupableReason<<" "<<name()<<endl);
|
||||
}
|
||||
void clearReducibleAndDedupable(const char* nonReducibleReason) {
|
||||
clearReducible(nonReducibleReason);
|
||||
clearDedupable(nonReducibleReason);
|
||||
}
|
||||
virtual AstNUser* accept(GateGraphBaseVisitor& v, AstNUser* vup=NULL) =0;
|
||||
// Returns only the result from the LAST vertex iterated over
|
||||
AstNUser* iterateInEdges(GateGraphBaseVisitor& v, AstNUser* vup=NULL) {
|
||||
AstNUser* retp;
|
||||
for (V3GraphEdge* edgep = inBeginp(); edgep; edgep = edgep->inNextp()) {
|
||||
retp = dynamic_cast<GateEitherVertex*>(edgep->fromp())->accept(v, vup);
|
||||
}
|
||||
return retp;
|
||||
}
|
||||
};
|
||||
|
||||
class GateVarVertex : public GateEitherVertex {
|
||||
@ -113,6 +143,7 @@ public:
|
||||
setIsClock();
|
||||
}
|
||||
}
|
||||
AstNUser* accept(GateGraphBaseVisitor& v, AstNUser* vup=NULL) { return v.visit(this,vup); }
|
||||
};
|
||||
|
||||
class GateLogicVertex : public GateEitherVertex {
|
||||
@ -129,6 +160,7 @@ public:
|
||||
AstNode* nodep() const { return m_nodep; }
|
||||
AstActive* activep() const { return m_activep; }
|
||||
bool slow() const { return m_slow; }
|
||||
AstNUser* accept(GateGraphBaseVisitor& v, AstNUser* vup=NULL) { return v.visit(this,vup); }
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
@ -143,6 +175,7 @@ private:
|
||||
// STATE
|
||||
bool m_buffersOnly; // Set when we only allow simple buffering, no equations (for clocks)
|
||||
AstNodeVarRef* m_lhsVarRef; // VarRef on lhs of assignment (what we're replacing)
|
||||
bool m_dedupe; // Set when we use isGateDedupable instead of isGateOptimizable
|
||||
|
||||
// METHODS
|
||||
void clearSimple(const char* because) {
|
||||
@ -202,7 +235,7 @@ private:
|
||||
virtual void visit(AstNode* nodep, AstNUser*) {
|
||||
// *** Special iterator
|
||||
if (!m_isSimple) return; // Fastpath
|
||||
if (!nodep->isGateOptimizable()
|
||||
if (!(m_dedupe ? nodep->isGateDedupable() : nodep->isGateOptimizable())
|
||||
|| !nodep->isPure()
|
||||
|| nodep->isBrancher()) {
|
||||
UINFO(5, "Non optimizable type: "<<nodep<<endl);
|
||||
@ -212,11 +245,12 @@ private:
|
||||
}
|
||||
public:
|
||||
// CONSTUCTORS
|
||||
GateOkVisitor(AstNode* nodep, bool buffersOnly) {
|
||||
GateOkVisitor(AstNode* nodep, bool buffersOnly, bool dedupe) {
|
||||
m_isSimple = true;
|
||||
m_substTreep = NULL;
|
||||
m_buffersOnly = buffersOnly;
|
||||
m_lhsVarRef = NULL;
|
||||
m_dedupe = dedupe;
|
||||
// Iterate
|
||||
nodep->accept(*this);
|
||||
// Check results
|
||||
@ -267,6 +301,7 @@ private:
|
||||
bool m_inSlow; // Inside a slow structure
|
||||
V3Double0 m_statSigs; // Statistic tracking
|
||||
V3Double0 m_statRefs; // Statistic tracking
|
||||
V3Double0 m_statDedupLogic; // Statistic tracking
|
||||
|
||||
// METHODS
|
||||
void iterateNewStmt(AstNode* nodep, const char* nonReducibleReason, const char* consumeReason) {
|
||||
@ -274,9 +309,10 @@ private:
|
||||
UINFO(4," STMT "<<nodep<<endl);
|
||||
// m_activep is null under AstCFunc's, that's ok.
|
||||
m_logicVertexp = new GateLogicVertex(&m_graph, m_scopep, nodep, m_activep, m_inSlow);
|
||||
if (!m_activeReducible) nonReducibleReason="Block Unreducible";
|
||||
if (nonReducibleReason) {
|
||||
m_logicVertexp->clearReducible(nonReducibleReason);
|
||||
m_logicVertexp->clearReducibleAndDedupable(nonReducibleReason);
|
||||
} else if (!m_activeReducible) {
|
||||
m_logicVertexp->clearReducible("Block Unreducible"); // Sequential logic is dedupable
|
||||
}
|
||||
if (consumeReason) m_logicVertexp->setConsumed(consumeReason);
|
||||
if (nodep->castSenItem()) m_logicVertexp->setConsumed("senItem");
|
||||
@ -293,13 +329,13 @@ private:
|
||||
varscp->user1p(vertexp);
|
||||
if (varscp->varp()->isSigPublic()) {
|
||||
// Public signals shouldn't be changed, pli code might be messing with them
|
||||
vertexp->clearReducible("SigPublic");
|
||||
vertexp->clearReducibleAndDedupable("SigPublic");
|
||||
vertexp->setConsumed("SigPublic");
|
||||
}
|
||||
if (varscp->varp()->isIO() && varscp->scopep()->isTop()) {
|
||||
// We may need to convert to/from sysc/reg sigs
|
||||
vertexp->setIsTop();
|
||||
vertexp->clearReducible("isTop");
|
||||
vertexp->clearReducibleAndDedupable("isTop");
|
||||
vertexp->setConsumed("isTop");
|
||||
}
|
||||
if (varscp->varp()->isUsedClock()) vertexp->setConsumed("clock");
|
||||
@ -314,6 +350,7 @@ private:
|
||||
void consumedMarkRecurse(GateEitherVertex* vertexp);
|
||||
void consumedMove();
|
||||
void replaceAssigns();
|
||||
void dedupe();
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstNetlist* nodep, AstNUser*) {
|
||||
@ -328,6 +365,8 @@ private:
|
||||
optimizeSignals(false);
|
||||
// Then propagate more complicated equations
|
||||
optimizeSignals(true);
|
||||
// Remove redundant logic
|
||||
if (v3Global.opt.oDedupe()) dedupe();
|
||||
// Warn
|
||||
warnSignals();
|
||||
consumedMark();
|
||||
@ -452,6 +491,7 @@ private:
|
||||
public:
|
||||
// CONSTUCTORS
|
||||
GateVisitor(AstNode* nodep) {
|
||||
AstNode::user1ClearTree();
|
||||
m_logicVertexp = NULL;
|
||||
m_scopep = NULL;
|
||||
m_modp = NULL;
|
||||
@ -464,6 +504,7 @@ public:
|
||||
virtual ~GateVisitor() {
|
||||
V3Stats::addStat("Optimizations, Gate sigs deleted", m_statSigs);
|
||||
V3Stats::addStat("Optimizations, Gate inputs replaced", m_statRefs);
|
||||
V3Stats::addStat("Optimizations, Gate sigs deduped", m_statDedupLogic);
|
||||
}
|
||||
};
|
||||
|
||||
@ -473,7 +514,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) {
|
||||
for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) {
|
||||
if (GateVarVertex* vvertexp = dynamic_cast<GateVarVertex*>(itp)) {
|
||||
if (vvertexp->inEmpty()) {
|
||||
vvertexp->clearReducible("inEmpty"); // Can't deal with no sources
|
||||
vvertexp->clearReducibleAndDedupable("inEmpty"); // Can't deal with no sources
|
||||
if (!vvertexp->isTop() // Ok if top inputs are driverless
|
||||
&& !vvertexp->varScp()->varp()->valuep()
|
||||
&& !vvertexp->varScp()->varp()->isSigPublic()) {
|
||||
@ -489,7 +530,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) {
|
||||
}
|
||||
}
|
||||
else if (!vvertexp->inSize1()) {
|
||||
vvertexp->clearReducible("size!1"); // Can't deal with more than one src
|
||||
vvertexp->clearReducibleAndDedupable("size!1"); // Can't deal with more than one src
|
||||
}
|
||||
// Reduce it?
|
||||
if (!vvertexp->reducible()) {
|
||||
@ -502,7 +543,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) {
|
||||
AstNode* logicp = logicVertexp->nodep();
|
||||
if (logicVertexp->reducible()) {
|
||||
// Can we eliminate?
|
||||
GateOkVisitor okVisitor(logicp, vvertexp->isClock());
|
||||
GateOkVisitor okVisitor(logicp, vvertexp->isClock(), false);
|
||||
bool multiInputs = okVisitor.rhsVarRefs().size() > 1;
|
||||
// Was it ok?
|
||||
bool doit = okVisitor.isSimple();
|
||||
@ -730,6 +771,8 @@ private:
|
||||
// However a VARREF should point to the original as it's otherwise confusing
|
||||
// to throw warnings that point to a PIN rather than where the pin us used.
|
||||
if (substp->castVarRef()) substp->fileline(nodep->fileline());
|
||||
// Make the substp an rvalue like nodep. This facilitate the hashing in dedupe.
|
||||
if (AstNodeVarRef* varrefp = substp->castNodeVarRef()) varrefp->lvalue(false);
|
||||
nodep->replaceWith(substp);
|
||||
nodep->deleteTree(); nodep=NULL;
|
||||
}
|
||||
@ -761,6 +804,240 @@ void GateVisitor::optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode*
|
||||
}
|
||||
}
|
||||
|
||||
//######################################################################
|
||||
// Auxiliary hash class for GateDedupeVarVisitor
|
||||
|
||||
class GateDedupeHash : public V3HashedUserCheck {
|
||||
private:
|
||||
// NODE STATE
|
||||
// Ast*::user2p -> parent AstNodeAssign* for this rhsp
|
||||
// Ast*::user3p -> AstNode* checked in test for duplicate
|
||||
// Ast*::user5p -> AstNode* checked in test for duplicate
|
||||
// AstUser2InUse m_inuser2; (Allocated for use in GateVisitor)
|
||||
AstUser3InUse m_inuser3;
|
||||
AstUser5InUse m_inuser5;
|
||||
V3Hashed m_hashed; // Hash, contains rhs of assigns
|
||||
|
||||
void hash(AstNode* nodep) {
|
||||
// !NULL && the object is hashable
|
||||
if (nodep && !nodep->sameHash().isIllegal()) {
|
||||
m_hashed.hash(nodep);
|
||||
}
|
||||
}
|
||||
bool sameHash(AstNode* node1p, AstNode* node2p) {
|
||||
return (node1p && node2p
|
||||
&& !node1p->sameHash().isIllegal()
|
||||
&& !node2p->sameHash().isIllegal()
|
||||
&& m_hashed.sameNodes(node1p,node2p));
|
||||
}
|
||||
bool same(AstNUser* node1p, AstNUser* node2p) {
|
||||
return node1p == node2p || sameHash((AstNode*)node1p,(AstNode*)node2p);
|
||||
}
|
||||
public:
|
||||
bool check(AstNode* node1p,AstNode* node2p) {
|
||||
return same(node1p->user3p(),node2p->user3p()) && same(node1p->user5p(),node2p->user5p())
|
||||
&& node1p->user2p()->castNode()->type() == node2p->user2p()->castNode()->type()
|
||||
;
|
||||
}
|
||||
|
||||
AstNodeAssign* hashAndFindDupe(AstNodeAssign* assignp, AstNode* extra1p, AstNode* extra2p) {
|
||||
AstNode *rhsp = assignp->rhsp();
|
||||
rhsp->user2p(assignp);
|
||||
rhsp->user3p(extra1p);
|
||||
rhsp->user5p(extra2p);
|
||||
|
||||
hash(extra1p);
|
||||
hash(extra2p);
|
||||
|
||||
V3Hashed::iterator inserted = m_hashed.hashAndInsert(rhsp);
|
||||
V3Hashed::iterator dupit = m_hashed.findDuplicate(rhsp, this);
|
||||
// Even though rhsp was just inserted, V3Hashed::findDuplicate doesn't
|
||||
// return anything in the hash that has the same pointer (V3Hashed.cpp::findDuplicate)
|
||||
// So dupit is either a different, duplicate rhsp, or the end of the hash.
|
||||
if (dupit != m_hashed.end()) {
|
||||
m_hashed.erase(inserted);
|
||||
return m_hashed.iteratorNodep(dupit)->user2p()->castNode()->castNodeAssign();
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// Have we seen the rhs of this assign before?
|
||||
|
||||
class GateDedupeVarVisitor : public GateBaseVisitor {
|
||||
// A node passed to findDupe() is visited in this order
|
||||
// (otherwise dupe not found)
|
||||
// 1. AstNodeAssign
|
||||
// 2. AstAlways -> AstNodeAssign
|
||||
// 3. AstAlways -> AstNodeIf -> AstNodeAssign
|
||||
private:
|
||||
// RETURN STATE
|
||||
AstNodeVarRef* m_dupLhsVarRefp; // Duplicate lhs varref that was found
|
||||
// STATE
|
||||
GateDedupeHash m_hash; // Hash used to find dupes
|
||||
AstVarScope* m_consumerVarScopep; // VarScope on lhs of assignment (what we're replacing)
|
||||
AstActive* m_activep; // AstActive that assign is under
|
||||
AstNode* m_ifCondp; // IF condition that assign is under
|
||||
bool m_always; // Assign is under an always
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstNodeAssign* assignp, AstNUser*) {
|
||||
AstNode* lhsp = assignp->lhsp();
|
||||
// Possible todo, handle more complex lhs expressions
|
||||
if (AstNodeVarRef* lhsVarRefp = lhsp->castNodeVarRef()) {
|
||||
if (lhsVarRefp->varScopep() != m_consumerVarScopep) m_consumerVarScopep->v3fatalSrc("Consumer doesn't match lhs of assign");
|
||||
if (AstNodeAssign* dup = m_hash.hashAndFindDupe(assignp,m_activep,m_ifCondp)) {
|
||||
m_dupLhsVarRefp = dup->lhsp()->castNodeVarRef();
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual void visit(AstAlways* alwaysp, AstNUser*) {
|
||||
// I think we could safely dedupe an always block with multiple non-blocking statements,
|
||||
// but erring on side of caution here
|
||||
if (!m_always && alwaysp->isJustOneBodyStmt()) {
|
||||
m_always = true;
|
||||
alwaysp->bodysp()->accept(*this);
|
||||
}
|
||||
}
|
||||
// Ugly support for latches of the specific form -
|
||||
// always @(...)
|
||||
// if (...)
|
||||
// foo = ...; // or foo <= ...;
|
||||
virtual void visit(AstNodeIf* ifp, AstNUser*) {
|
||||
if (m_always && !ifp->elsesp()) { //we're under an always and there's no else
|
||||
AstNode* ifsp = ifp->ifsp();
|
||||
if (!ifsp->nextp()) { //only one stmt under if
|
||||
m_ifCondp = ifp->condp();
|
||||
ifsp->accept(*this);
|
||||
}
|
||||
}
|
||||
}
|
||||
//--------------------
|
||||
// Default
|
||||
virtual void visit(AstNode* nodep, AstNUser*) {}
|
||||
|
||||
public:
|
||||
// CONSTUCTORS
|
||||
GateDedupeVarVisitor() {}
|
||||
// PUBLIC METHODS
|
||||
AstNodeVarRef* findDupe(AstNode* nodep, AstVarScope* consumerVarScopep, AstActive* activep) {
|
||||
m_consumerVarScopep = consumerVarScopep;
|
||||
m_activep = activep;
|
||||
m_always = false;
|
||||
m_ifCondp = NULL;
|
||||
m_dupLhsVarRefp = NULL;
|
||||
nodep->accept(*this);
|
||||
return m_dupLhsVarRefp;
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// Recurse through the graph, looking for duplicate expressions on the rhs of an assign
|
||||
|
||||
class GateDedupeGraphVisitor : public GateGraphBaseVisitor {
|
||||
private:
|
||||
// NODE STATE
|
||||
// AstVarScope::user2p -> bool: already visited
|
||||
// AstUser2InUse m_inuser2; (Allocated for use in GateVisitor)
|
||||
V3Double0 m_numDeduped; // Statistic tracking
|
||||
GateDedupeVarVisitor m_varVisitor; // Looks for a dupe of the logic
|
||||
|
||||
virtual AstNUser* visit(GateVarVertex *vvertexp, AstNUser*) {
|
||||
// Check that we haven't been here before
|
||||
if (vvertexp->varScp()->user2()) return NULL;
|
||||
vvertexp->varScp()->user2(true);
|
||||
|
||||
AstNodeVarRef* dupVarRefp = (AstNodeVarRef*) vvertexp->iterateInEdges(*this, (AstNUser*) vvertexp);
|
||||
|
||||
if (dupVarRefp && vvertexp->inSize1()) {
|
||||
V3GraphEdge* edgep = vvertexp->inBeginp();
|
||||
GateLogicVertex* lvertexp = (GateLogicVertex*)edgep->fromp();
|
||||
if (!vvertexp->dedupable()) vvertexp->varScp()->v3fatalSrc("GateLogicVertex* visit should have returned NULL if consumer var vertex is not dedupable.");
|
||||
GateOkVisitor okVisitor(lvertexp->nodep(), false, true);
|
||||
if (okVisitor.isSimple()) {
|
||||
AstVarScope* dupVarScopep = dupVarRefp->varScopep();
|
||||
GateVarVertex* dupVvertexp = (GateVarVertex*) (dupVarScopep->user1p());
|
||||
UINFO(4,"replacing " << vvertexp << " with " << dupVvertexp << endl);
|
||||
m_numDeduped++;
|
||||
// Replace all of this varvertex's consumers with dupVarRefp
|
||||
for (V3GraphEdge* outedgep = vvertexp->outBeginp();outedgep;) {
|
||||
GateLogicVertex* consumeVertexp = dynamic_cast<GateLogicVertex*>(outedgep->top());
|
||||
AstNode* consumerp = consumeVertexp->nodep();
|
||||
GateElimVisitor elimVisitor(consumerp,vvertexp->varScp(),dupVarRefp);
|
||||
outedgep = outedgep->relinkFromp(dupVvertexp);
|
||||
}
|
||||
|
||||
// Propogate attributes
|
||||
dupVvertexp->propagateAttrClocksFrom(vvertexp);
|
||||
|
||||
// Remove inputs links
|
||||
while (V3GraphEdge* inedgep = vvertexp->inBeginp()) {
|
||||
inedgep->unlinkDelete(); inedgep=NULL;
|
||||
}
|
||||
// replaceAssigns() does the deleteTree on lvertexNodep in a later step
|
||||
AstNode* lvertexNodep = lvertexp->nodep();
|
||||
lvertexNodep->unlinkFrBack();
|
||||
vvertexp->varScp()->valuep(lvertexNodep);
|
||||
lvertexNodep = NULL;
|
||||
vvertexp->user(true);
|
||||
lvertexp->user(true);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Returns a varref that has the same logic input
|
||||
virtual AstNUser* visit(GateLogicVertex* lvertexp, AstNUser* vup) {
|
||||
lvertexp->iterateInEdges(*this);
|
||||
|
||||
GateVarVertex* consumerVvertexpp = (GateVarVertex*) vup;
|
||||
if (lvertexp->dedupable() && consumerVvertexpp->dedupable()) {
|
||||
AstNode* nodep = lvertexp->nodep();
|
||||
AstVarScope* consumerVarScopep = consumerVvertexpp->varScp();
|
||||
// TODO: Doing a simple pointer comparison of activep won't work
|
||||
// optimally for statements under generated clocks. Statements under
|
||||
// different generated clocks will never compare as equal, even if the
|
||||
// generated clocks are deduped into one clock.
|
||||
AstActive* activep = lvertexp->activep();
|
||||
return (AstNUser*) m_varVisitor.findDupe(nodep, consumerVarScopep, activep);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
public:
|
||||
GateDedupeGraphVisitor() {}
|
||||
void dedupeTree(GateVarVertex* vvertexp) {
|
||||
vvertexp->accept(*this);
|
||||
}
|
||||
V3Double0 numDeduped() { return m_numDeduped; }
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
void GateVisitor::dedupe() {
|
||||
AstNode::user2ClearTree();
|
||||
GateDedupeGraphVisitor deduper;
|
||||
// Traverse starting from each of the clocks
|
||||
for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) {
|
||||
if (GateVarVertex* vvertexp = dynamic_cast<GateVarVertex*>(itp)) {
|
||||
if (vvertexp->isClock()) {
|
||||
deduper.dedupeTree(vvertexp);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Traverse starting from each of the outputs
|
||||
for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) {
|
||||
if (GateVarVertex* vvertexp = dynamic_cast<GateVarVertex*>(itp)) {
|
||||
if (vvertexp->isTop() && vvertexp->varScp()->varp()->isOutput()) {
|
||||
deduper.dedupeTree(vvertexp);
|
||||
}
|
||||
}
|
||||
}
|
||||
m_statDedupLogic += deduper.numDeduped();
|
||||
}
|
||||
|
||||
//######################################################################
|
||||
// Convert VARSCOPE(ASSIGN(default, VARREF)) to just VARSCOPE(default)
|
||||
|
||||
|
@ -765,6 +765,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
|
||||
case 'a': m_oTable = flag; break;
|
||||
case 'b': m_oCombine = flag; break;
|
||||
case 'c': m_oConst = flag; break;
|
||||
case 'd': m_oDedupe = flag; break;
|
||||
case 'e': m_oCase = flag; break;
|
||||
case 'f': m_oFlopGater = flag; break;
|
||||
case 'g': m_oGate = flag; break;
|
||||
@ -1301,6 +1302,7 @@ void V3Options::optimize(int level) {
|
||||
m_oSubst = flag;
|
||||
m_oSubstConst = flag;
|
||||
m_oTable = flag;
|
||||
m_oDedupe = level >= 3;
|
||||
// And set specific optimization levels
|
||||
if (level >= 3) {
|
||||
m_inlineMult = -1; // Maximum inlining
|
||||
|
@ -131,6 +131,7 @@ class V3Options {
|
||||
bool m_oCase; // main switch: -Oe: case tree conversion
|
||||
bool m_oCombine; // main switch: -Ob: common icode packing
|
||||
bool m_oConst; // main switch: -Oc: constant folding
|
||||
bool m_oDedupe; // main switch: -Od: logic deduplication
|
||||
bool m_oExpand; // main switch: -Ox: expansion of C macros
|
||||
bool m_oFlopGater; // main switch: -Of: flop gater detection
|
||||
bool m_oGate; // main switch: -Og: gate wire elimination
|
||||
@ -266,6 +267,7 @@ class V3Options {
|
||||
bool oCase() const { return m_oCase; }
|
||||
bool oCombine() const { return m_oCombine; }
|
||||
bool oConst() const { return m_oConst; }
|
||||
bool oDedupe() const { return m_oDedupe; }
|
||||
bool oExpand() const { return m_oExpand; }
|
||||
bool oFlopGater() const { return m_oFlopGater; }
|
||||
bool oGate() const { return m_oGate; }
|
||||
|
@ -365,6 +365,7 @@ sub new {
|
||||
'v3' => 0,
|
||||
verilator_flags => ["-cc",
|
||||
"-Mdir $self->{obj_dir}",
|
||||
"-OD", # As currently disabled unless -O3
|
||||
"--debug-check"],
|
||||
verilator_flags2 => [],
|
||||
verilator_make_gcc => 1,
|
||||
|
@ -17,7 +17,6 @@ compile (
|
||||
%Warning-CDCRSTLOGIC: See details in obj_dir/t_cdc_async_bad/Vt_cdc_async_bad__cdc.txt
|
||||
%Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst6a_bad_n
|
||||
%Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst6b_bad_n
|
||||
%Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst3_bad_n
|
||||
%Error: Exiting due to.*',
|
||||
);
|
||||
|
||||
|
19
test_regress/t/t_dedupe_clk_gate.pl
Executable file
19
test_regress/t/t_dedupe_clk_gate.pl
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/perl
|
||||
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2003 by Wilson Snyder. This program is free software; you can
|
||||
# redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
|
||||
compile (
|
||||
verilator_flags2 => ["--stats"],
|
||||
);
|
||||
|
||||
if ($Self->{vlt}) {
|
||||
file_grep ($Self->{stats}, qr/Optimizations, Gate sigs deduped\s+(\d+)/i, 4);
|
||||
}
|
||||
|
||||
ok(1);
|
||||
1;
|
61
test_regress/t/t_dedupe_clk_gate.v
Normal file
61
test_regress/t/t_dedupe_clk_gate.v
Normal file
@ -0,0 +1,61 @@
|
||||
// DESCRIPTION: Verilator: Dedupe optimization test.
|
||||
//
|
||||
// This file ONLY is placed into the Public Domain, for any use,
|
||||
// without warranty.
|
||||
|
||||
// Contributed 2012 by Varun Koyyalagunta, Centaur Technology.
|
||||
|
||||
module t(res,d,clk,en);
|
||||
output res;
|
||||
input d,en,clk;
|
||||
wire q0,q1,q2,q3;
|
||||
|
||||
flop_gated_latch f0(q0,d,clk,en);
|
||||
flop_gated_latch f1(q1,d,clk,en);
|
||||
flop_gated_flop f2(q2,d,clk,en);
|
||||
flop_gated_flop f3(q3,d,clk,en);
|
||||
assign res = (q0 + q1) * (q2 - q3);
|
||||
endmodule
|
||||
|
||||
module flop_gated_latch(q,d,clk,en);
|
||||
input d, clk, en;
|
||||
output q;
|
||||
wire gated_clock;
|
||||
clock_gate_latch clock_gate(gated_clock, clk, en);
|
||||
always @(posedge gated_clock) begin
|
||||
q <= d;
|
||||
end
|
||||
endmodule
|
||||
|
||||
module flop_gated_flop(q,d,clk,en);
|
||||
input d, clk, en;
|
||||
output q;
|
||||
wire gated_clock;
|
||||
clock_gate_flop clock_gate(gated_clock, clk, en);
|
||||
always @(posedge gated_clock) begin
|
||||
q <= d;
|
||||
end
|
||||
endmodule
|
||||
|
||||
module clock_gate_latch (gated_clk, clk, clken);
|
||||
output gated_clk;
|
||||
input clk, clken;
|
||||
reg clken_latched /*verilator clock_enable*/;
|
||||
assign gated_clk = clk & clken_latched ;
|
||||
|
||||
wire clkb = ~clk;
|
||||
always @(clkb or clken)
|
||||
if(clkb) clken_latched = clken;
|
||||
|
||||
endmodule
|
||||
|
||||
module clock_gate_flop (gated_clk, clk, clken);
|
||||
output gated_clk;
|
||||
input clk, clken;
|
||||
reg clken_r /*verilator clock_enable*/;
|
||||
assign gated_clk = clk & clken_r ;
|
||||
|
||||
always @(negedge clk)
|
||||
clken_r <= clken;
|
||||
|
||||
endmodule
|
19
test_regress/t/t_dedupe_seq_logic.pl
Executable file
19
test_regress/t/t_dedupe_seq_logic.pl
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/perl
|
||||
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2003 by Wilson Snyder. This program is free software; you can
|
||||
# redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
|
||||
compile (
|
||||
verilator_flags2 => ["--stats"],
|
||||
);
|
||||
|
||||
if ($Self->{vlt}) {
|
||||
file_grep ($Self->{stats}, qr/Optimizations, Gate sigs deduped\s+(\d+)/i, 6);
|
||||
}
|
||||
|
||||
ok(1);
|
||||
1;
|
123
test_regress/t/t_dedupe_seq_logic.v
Normal file
123
test_regress/t/t_dedupe_seq_logic.v
Normal file
@ -0,0 +1,123 @@
|
||||
// DESCRIPTION: Verilator: Dedupe optimization test.
|
||||
//
|
||||
// This file ONLY is placed into the Public Domain, for any use,
|
||||
// without warranty.
|
||||
|
||||
// Contributed 2012 by Varun Koyyalagunta, Centaur Technology.
|
||||
//
|
||||
// Test consists of the follow logic tree, which has many obvious
|
||||
// places for dedupe:
|
||||
/*
|
||||
output
|
||||
+
|
||||
--------------/ \--------------
|
||||
/ \
|
||||
+ +
|
||||
----/ \----- ----/ \----
|
||||
/ + / +
|
||||
+ / \ + / \
|
||||
-/ \- a b -/ \- a b
|
||||
/ \ / \
|
||||
+ + + +
|
||||
/ \ / \ / \ / \
|
||||
a b c d a b c d
|
||||
*/
|
||||
|
||||
module t(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
wire left,right;
|
||||
add add(sum,left,right,clk);
|
||||
l l(left,a,b,c,d,clk);
|
||||
r r(right,a,b,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module l(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
wire left, right;
|
||||
add add(sum,left,right,clk);
|
||||
ll ll(left,a,b,c,d,clk);
|
||||
lr lr(right,a,b,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module ll(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
wire left, right;
|
||||
add add(sum,left,right,clk);
|
||||
lll lll(left,a,b,c,d,clk);
|
||||
llr llr(right,a,b,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module lll(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add add(sum,a,b,clk);
|
||||
endmodule
|
||||
|
||||
module llr(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add add(sum,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module lr(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add add(sum,a,b,clk);
|
||||
endmodule
|
||||
|
||||
module r(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
wire left, right;
|
||||
add add(sum,left,right,clk);
|
||||
rl rl(left,a,b,c,d,clk);
|
||||
rr rr(right,a,b,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module rr(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add add(sum,a,b,clk);
|
||||
endmodule
|
||||
|
||||
module rl(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
wire left, right;
|
||||
add add(sum,left,right,clk);
|
||||
rll rll(left,a,b,c,d,clk);
|
||||
rlr rlr(right,a,b,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module rll(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add2 add(sum,a,b,clk);
|
||||
endmodule
|
||||
|
||||
module rlr(sum,a,b,c,d,clk);
|
||||
output sum;
|
||||
input a,b,c,d,clk;
|
||||
add2 add(sum,c,d,clk);
|
||||
endmodule
|
||||
|
||||
module add(sum,x,y,clk);
|
||||
output sum;
|
||||
input x,y,clk;
|
||||
reg t1,t2;
|
||||
always @(posedge clk) begin
|
||||
sum <= x + y;
|
||||
end
|
||||
endmodule
|
||||
|
||||
module add2(sum,x,y,clk);
|
||||
output sum;
|
||||
input x,y,clk;
|
||||
reg t1,t2;
|
||||
always @(posedge clk) begin
|
||||
sum <= x + y;
|
||||
end
|
||||
endmodule
|
Loading…
Reference in New Issue
Block a user