mirror of
https://github.com/verilator/verilator.git
synced 2025-04-29 12:06:54 +00:00
Internals: Add V3InstrCount, for threads branch.
This commit is contained in:
parent
2f18a52118
commit
81ef9b5dd2
@ -199,6 +199,7 @@ RAW_OBJS = \
|
||||
V3Hashed.o \
|
||||
V3Inline.o \
|
||||
V3Inst.o \
|
||||
V3InstrCount.o \
|
||||
V3Life.o \
|
||||
V3LifePost.o \
|
||||
V3LinkCells.o \
|
||||
|
255
src/V3InstrCount.cpp
Normal file
255
src/V3InstrCount.cpp
Normal file
@ -0,0 +1,255 @@
|
||||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Estimate instruction count to run the logic
|
||||
// we would generate for any given AST subtree.
|
||||
//
|
||||
// Code available from: http://www.veripool.org/verilator
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2018 by Wilson Snyder. This program is free software; you can
|
||||
// redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
//
|
||||
// Verilator is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
#include "V3Ast.h"
|
||||
#include "V3InstrCount.h"
|
||||
|
||||
/// Estimate the instruction cost for executing all logic within and below
|
||||
/// a given AST node. Note this estimates the number of instructions we'll
|
||||
/// execute, not the number we'll generate. That is, for conditionals,
|
||||
/// we'll count instructions from either the 'if' or the 'else' branch,
|
||||
/// whichever is larger. We know we won't run both.
|
||||
|
||||
class InstrCountVisitor : public AstNVisitor {
|
||||
private:
|
||||
// MEMBERS
|
||||
uint32_t m_instrCount; // Running count of instructions
|
||||
const AstNode* m_startNodep; // Start node of count
|
||||
bool m_tracingCall; // Iterating into a CCall to a CFunc
|
||||
bool m_inCFunc; // Inside AstCFunc
|
||||
bool m_assertNoDups; // Check for duplicates
|
||||
int m_debug; // Debug level, with possible override
|
||||
unsigned m_debugDepth; // Current tree depth for debug indent
|
||||
|
||||
// TYPES
|
||||
// Little class to cleanly call startVisitBase/endVisitBase
|
||||
class VisitBase {
|
||||
private:
|
||||
// MEMBERS
|
||||
uint32_t m_savedCount;
|
||||
AstNode* m_nodep;
|
||||
InstrCountVisitor* m_visitor;
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
VisitBase(InstrCountVisitor* visitor, AstNode* nodep)
|
||||
: m_nodep(nodep), m_visitor(visitor) {
|
||||
m_savedCount = m_visitor->startVisitBase(nodep);
|
||||
}
|
||||
~VisitBase() {
|
||||
m_visitor->endVisitBase(m_savedCount, m_nodep);
|
||||
}
|
||||
private:
|
||||
VL_UNCOPYABLE(VisitBase);
|
||||
};
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
InstrCountVisitor(AstNode* nodep, bool assertNoDups, int forceDebug)
|
||||
: m_instrCount(0),
|
||||
m_startNodep(nodep),
|
||||
m_tracingCall(false),
|
||||
m_inCFunc(false),
|
||||
m_assertNoDups(assertNoDups),
|
||||
m_debugDepth(0) {
|
||||
m_debug = std::max(forceDebug, v3Global.opt.debugSrcLevel(__FILE__));
|
||||
if (nodep) iterate(nodep);
|
||||
}
|
||||
virtual ~InstrCountVisitor() {}
|
||||
|
||||
// METHODS
|
||||
uint32_t instrCount() const { return m_instrCount; }
|
||||
|
||||
private:
|
||||
int debug() const { return m_debug; }
|
||||
string indent() { return string(m_debugDepth, ' ')+cvtToStr(m_debugDepth)+"> "; }
|
||||
|
||||
uint32_t startVisitBase(AstNode* nodep) {
|
||||
if (m_assertNoDups && !m_inCFunc) {
|
||||
// Ensure we don't count the same node twice
|
||||
//
|
||||
// We only enable this assert for the initial LogicMTask counts
|
||||
// in V3Order. We can't enable it for the 2nd pass in V3EmitC,
|
||||
// as we expect mtasks to contain common logic after V3Combine,
|
||||
// so this would fail.
|
||||
//
|
||||
// Also, we expect some collisions within calls to CFuncs
|
||||
// (which at the V3Order stage represent verilog tasks, not to
|
||||
// the CFuncs that V3Order will generate.) So don't check for
|
||||
// collisions in CFuncs.
|
||||
if (nodep->user5p()) {
|
||||
nodep->v3fatalSrc("Node originally inserted below logic vertex "
|
||||
<<static_cast<AstNode*>(nodep->user5p()));
|
||||
}
|
||||
nodep->user5p(const_cast<void*>(reinterpret_cast<const void*>(m_startNodep)));
|
||||
}
|
||||
|
||||
// Save the count, and add it back in during ~VisitBase This allows
|
||||
// debug prints to show local cost of each subtree, so we can see a
|
||||
// hierarchical view of the cost when in debug mode.
|
||||
++m_debugDepth;
|
||||
uint32_t savedCount = m_instrCount;
|
||||
m_instrCount = nodep->instrCount();
|
||||
return savedCount;
|
||||
}
|
||||
void endVisitBase(uint32_t savedCount, AstNode* nodep) {
|
||||
UINFO(8, indent()<<"cost "<<m_instrCount<<" "<<nodep<<endl);
|
||||
--m_debugDepth;
|
||||
m_instrCount += savedCount;
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstNodeSel* nodep) {
|
||||
// This covers both AstArraySel and AstWordSel
|
||||
//
|
||||
// If some vector is a bazillion dwords long, and we're selecting 1
|
||||
// dword to read or write from it, our cost should be small.
|
||||
//
|
||||
// Hence, exclude the child of the AstWordSel from the computation,
|
||||
// whose cost scales with the size of the entire (maybe large) vector.
|
||||
VisitBase vb(this, nodep);
|
||||
iterateAndNextNull(nodep->bitp());
|
||||
}
|
||||
virtual void visit(AstSel* nodep) {
|
||||
// Similar to AstNodeSel above, a small select into a large vector
|
||||
// is not expensive. Count the cost of the AstSel itself (scales with
|
||||
// its width) and the cost of the lsbp() and widthp() nodes, but not
|
||||
// the fromp() node which could be disproportionately large.
|
||||
VisitBase vb(this, nodep);
|
||||
iterateAndNextNull(nodep->lsbp());
|
||||
iterateAndNextNull(nodep->widthp());
|
||||
}
|
||||
virtual void visit(AstSliceSel* nodep) {
|
||||
nodep->v3fatalSrc("AstSliceSel unhandled");
|
||||
}
|
||||
virtual void visit(AstMemberSel* nodep) {
|
||||
nodep->v3fatalSrc("AstMemberSel unhandled");
|
||||
}
|
||||
virtual void visit(AstConcat* nodep) {
|
||||
// Nop.
|
||||
//
|
||||
// Ignore concat. The problem with counting concat is that when we
|
||||
// have many things concatted together, it's not a single
|
||||
// operation, but this:
|
||||
//
|
||||
// concat(a, concat(b, concat(c, concat(d, ... ))))
|
||||
//
|
||||
// Then if we account a cost to each 'concat' that scales with its
|
||||
// width, this whole operation ends up with a cost accounting that
|
||||
// scales with N^2. Of course, the real operation isn't that
|
||||
// expensive: we won't copy each element over and over, we'll just
|
||||
// copy it once from its origin into its destination, so the actual
|
||||
// cost is linear with the size of the data. We don't need to count
|
||||
// the concat at all to reflect a linear cost; it's already there
|
||||
// in the width of the destination (which we count) and the sum of
|
||||
// the widths of the operands (ignored here).
|
||||
}
|
||||
virtual void visit(AstNodeIf* nodep) {
|
||||
VisitBase vb(this, nodep);
|
||||
iterateAndNextNull(nodep->condp());
|
||||
uint32_t savedCount = m_instrCount;
|
||||
|
||||
UINFO(8, indent()<<"ifsp:\n");
|
||||
m_instrCount = 0;
|
||||
iterateAndNextNull(nodep->ifsp());
|
||||
uint32_t ifCount = m_instrCount;
|
||||
|
||||
UINFO(8, indent()<<"elsesp:\n");
|
||||
m_instrCount = 0;
|
||||
iterateAndNextNull(nodep->elsesp());
|
||||
uint32_t elseCount = m_instrCount;
|
||||
|
||||
m_instrCount = savedCount + std::max(ifCount, elseCount);
|
||||
}
|
||||
virtual void visit(AstNodeCond* nodep) {
|
||||
// Just like if/else above, the ternary operator only evaluates
|
||||
// one of the two expressions, so only count the max.
|
||||
VisitBase vb(this, nodep);
|
||||
iterateAndNextNull(nodep->condp());
|
||||
uint32_t savedCount = m_instrCount;
|
||||
|
||||
UINFO(8, indent()<<"?\n");
|
||||
m_instrCount = 0;
|
||||
iterateAndNextNull(nodep->expr1p());
|
||||
uint32_t ifCount = m_instrCount;
|
||||
|
||||
UINFO(8, indent()<<":\n");
|
||||
m_instrCount = 0;
|
||||
iterateAndNextNull(nodep->expr2p());
|
||||
uint32_t elseCount = m_instrCount;
|
||||
|
||||
m_instrCount = savedCount + std::max(ifCount, elseCount);
|
||||
}
|
||||
virtual void visit(AstActive* nodep) {
|
||||
// You'd think that the OrderLogicVertex's would be disjoint trees
|
||||
// of stuff in the AST, but it isn't so: V3Order makes an
|
||||
// OrderLogicVertex for each ACTIVE, and then also makes an
|
||||
// OrderLogicVertex for each statement within the ACTIVE.
|
||||
//
|
||||
// To avoid double-counting costs, stop recursing and short-circuit
|
||||
// the computation for each ACTIVE.
|
||||
//
|
||||
// Our intent is that this only stops at the root node of the
|
||||
// search; there should be no actives beneath the root, as there
|
||||
// are no actives-under-actives. In any case, check that we're at
|
||||
// root:
|
||||
if (nodep != m_startNodep) {
|
||||
nodep->v3fatalSrc("Multiple actives, or not start node");
|
||||
}
|
||||
}
|
||||
virtual void visit(AstCCall* nodep) {
|
||||
VisitBase vb(this, nodep);
|
||||
iterateChildren(nodep);
|
||||
m_tracingCall = true;
|
||||
iterate(nodep->funcp());
|
||||
if (m_tracingCall) {
|
||||
nodep->v3fatalSrc("visit(AstCFunc) should have cleared m_tracingCall.");
|
||||
}
|
||||
}
|
||||
virtual void visit(AstCFunc* nodep) {
|
||||
// Don't count a CFunc other than by tracing a call or counting it
|
||||
// from the root
|
||||
if (!m_tracingCall && (nodep != m_startNodep)) {
|
||||
nodep->v3fatalSrc("AstCFunc not under AstCCall, or not start node");
|
||||
}
|
||||
m_tracingCall = false;
|
||||
bool saved_inCFunc = m_inCFunc;
|
||||
m_inCFunc = true;
|
||||
{
|
||||
VisitBase vb(this, nodep);
|
||||
iterateChildren(nodep);
|
||||
}
|
||||
m_inCFunc = saved_inCFunc;
|
||||
}
|
||||
virtual void visit(AstNode* nodep) {
|
||||
VisitBase vb(this, nodep);
|
||||
iterateChildren(nodep);
|
||||
}
|
||||
|
||||
VL_UNCOPYABLE(InstrCountVisitor);
|
||||
};
|
||||
|
||||
uint32_t V3InstrCount::count(AstNode* nodep, bool assertNoDups, int forceDebug) {
|
||||
InstrCountVisitor visitor(nodep, assertNoDups, forceDebug);
|
||||
return visitor.instrCount();
|
||||
}
|
42
src/V3InstrCount.h
Normal file
42
src/V3InstrCount.h
Normal file
@ -0,0 +1,42 @@
|
||||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Estimate instruction count to run the logic
|
||||
// we would generate for any given AST subtree.
|
||||
//
|
||||
// Code available from: http://www.veripool.org/verilator
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2018 by Wilson Snyder. This program is free software; you can
|
||||
// redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
//
|
||||
// Verilator is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
class AstNode;
|
||||
|
||||
class V3InstrCount {
|
||||
public:
|
||||
// Return the estimate count of instructions we'd incur while running
|
||||
// code in and under nodep.
|
||||
//
|
||||
// This is a rough estimate; we don't know what path we'll take through
|
||||
// conditionals in nodep, so we assume we take the longest path.
|
||||
//
|
||||
// If nodep is an AstActive, returns 0.
|
||||
// If nodep contains nested AstActives, raises an error.
|
||||
//
|
||||
// If assertNoDups is true, marks user5 on each AstNode scanned. Then
|
||||
// if we see the same node twice (across more than one call to count,
|
||||
// potentially) raises an error.
|
||||
static uint32_t count(AstNode* nodep, bool assertNoDups, int forceDebug = 0);
|
||||
};
|
Loading…
Reference in New Issue
Block a user