mirror of
https://github.com/verilator/verilator.git
synced 2025-04-06 04:32:39 +00:00
Add --output-split-cfuncs for accelerating GCC.
git-svn-id: file://localhost/svn/verilator/trunk/verilator@829 77ca24e4-aefa-0310-84f0-b9a241c72d87
This commit is contained in:
parent
9026118a7c
commit
86c8e7b3e6
2
Changes
2
Changes
@ -5,6 +5,8 @@ indicates the contributor was also the author of the fix; Thanks!
|
||||
|
||||
* Verilator 3.6**
|
||||
|
||||
*** Add --output-split-cfuncs for accelerating GCC. [Eugene Weber]
|
||||
|
||||
**** Fix $signed mis-extending when input has a WIDTH violation. [Eugene Weber]
|
||||
|
||||
* Verilator 3.622 10/17/2006 Stable
|
||||
|
@ -377,9 +377,18 @@ C++ file exceeds the specified number of operations, a new file will be
|
||||
created. In addition, any slow routines will be placed into __Slow files.
|
||||
This accelerates compilation by as optimization can be disabled on the slow
|
||||
routines, and the remaining files can be compiled on parallel machines.
|
||||
Using --output-split should have only a trivial impact on performance.
|
||||
With GCC 3.3 on a 2GHz Opteron, --output-split 20000 will result in
|
||||
splitting into approximately one-minute-compile chunks.
|
||||
|
||||
=item --output-split-cfuncs I<statements>
|
||||
|
||||
Enables splitting functions in the output .cpp/.sp files into multiple
|
||||
functions. When a generated function exceeds the specified number of
|
||||
operations, a new function will be created. With --output-split, this will
|
||||
enable GCC to compile faster, at a small loss in performance that increases
|
||||
with smaller statement values.
|
||||
|
||||
=item --pins64
|
||||
|
||||
Specifies SystemC outputs of 33-64 bits wide should use uint64_t instead of
|
||||
|
@ -348,6 +348,10 @@ void V3Options::parseOptsList(FileLine* fl, int argc, char** argv) {
|
||||
shift;
|
||||
m_outputSplit = atoi(argv[i]);
|
||||
}
|
||||
else if ( !strcmp (sw, "-output-split-cfuncs") ) {
|
||||
shift;
|
||||
m_outputSplitCFuncs = atoi(argv[i]);
|
||||
}
|
||||
else if ( !strcmp (sw, "-unroll-count") ) { // Undocumented optimization tweak
|
||||
shift;
|
||||
m_unrollCount = atoi(argv[i]);
|
||||
@ -581,6 +585,7 @@ V3Options::V3Options() {
|
||||
|
||||
m_inlineMult = 2000;
|
||||
m_outputSplit = 0;
|
||||
m_outputSplitCFuncs = 0;
|
||||
m_unrollCount = 64;
|
||||
m_unrollStmts = 20;
|
||||
|
||||
|
@ -70,6 +70,7 @@ class V3Options {
|
||||
|
||||
int m_inlineMult; // main switch: --inline-mult
|
||||
int m_outputSplit; // main switch: --output-split
|
||||
int m_outputSplitCFuncs;// main switch: --output-split-cfuncs
|
||||
int m_unrollCount; // main switch: --unroll-count
|
||||
int m_unrollStmts; // main switch: --unroll-stmts
|
||||
|
||||
@ -152,6 +153,7 @@ class V3Options {
|
||||
|
||||
int inlineMult() const { return m_inlineMult; }
|
||||
int outputSplit() const { return m_outputSplit; }
|
||||
int outputSplitCFuncs() const { return m_outputSplitCFuncs; }
|
||||
int unrollCount() const { return m_unrollCount; }
|
||||
int unrollStmts() const { return m_unrollStmts; }
|
||||
|
||||
|
@ -271,6 +271,7 @@ private:
|
||||
vector<OrderLoopEndVertex*> m_pmlLoopEndps; // processInsLoop: End vertex for each color
|
||||
vector<OrderLoopBeginVertex*> m_pomLoopMoveps;// processMoveLoop: Loops next nodes are under
|
||||
AstCFunc* m_pomNewFuncp; // Current function being created
|
||||
int m_pomNewStmts; // Statements in function being created
|
||||
V3Graph m_pomGraph; // Graph of logic elements to move
|
||||
V3List<OrderMoveVertex*> m_pomWaiting; // List of nodes needing inputs to become ready
|
||||
protected:
|
||||
@ -688,6 +689,7 @@ public:
|
||||
m_settleVxp = NULL;
|
||||
m_inputsVxp = NULL;
|
||||
m_loopIdMax = LOOPID_FIRST;
|
||||
m_pomNewStmts = 0;
|
||||
if (debug()) m_graph.debug(5); // 3 is default if global debug; we want acyc debugging
|
||||
}
|
||||
virtual ~OrderVisitor() {
|
||||
@ -1374,8 +1376,10 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
|
||||
}
|
||||
else { // Normal logic
|
||||
// Make or borrow a CFunc to contain the new statements
|
||||
if (v3Global.opt.profileCFuncs()) {
|
||||
// Put every statement into a unique function to ease profiling
|
||||
if (v3Global.opt.profileCFuncs()
|
||||
|| (v3Global.opt.outputSplitCFuncs()
|
||||
&& v3Global.opt.outputSplitCFuncs() < m_pomNewStmts)) {
|
||||
// Put every statement into a unique function to ease profiling or reduce function size
|
||||
m_pomNewFuncp = NULL;
|
||||
}
|
||||
if (!m_pomNewFuncp && domainp != m_deleteDomainp) {
|
||||
@ -1383,6 +1387,7 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
|
||||
m_pomNewFuncp = new AstCFunc(nodep->fileline(), name, scopep);
|
||||
m_pomNewFuncp->argTypes(EmitCBaseVisitor::symClassVar());
|
||||
m_pomNewFuncp->symProlog(true);
|
||||
m_pomNewStmts = 0;
|
||||
if (domainp->hasInitial() || domainp->hasSettle()) m_pomNewFuncp->slow(true);
|
||||
scopep->addActivep(m_pomNewFuncp);
|
||||
// Where will we be adding the call?
|
||||
@ -1402,6 +1407,11 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
|
||||
pushDeletep(nodep); nodep=NULL;
|
||||
} else {
|
||||
m_pomNewFuncp->addStmtsp(nodep);
|
||||
if (v3Global.opt.outputSplitCFuncs()) {
|
||||
// Add in the number of nodes we're adding
|
||||
EmitCBaseCounterVisitor visitor(nodep);
|
||||
m_pomNewStmts += visitor.count();
|
||||
}
|
||||
}
|
||||
}
|
||||
processMoveDoneOne (vertexp);
|
||||
|
Loading…
Reference in New Issue
Block a user