Internals: Add V3InstrCount, for threads branch.

2025-04-29 12:06:54 +00:00 · 2018-07-04 21:52:15 -04:00 · 2018-07-04 21:52:15 -04:00 · 81ef9b5dd2
commit 81ef9b5dd2
parent 2f18a52118
3 changed files with 298 additions and 0 deletions
--- a/src/Makefile_obj.in
+++ b/src/Makefile_obj.in
@ -199,6 +199,7 @@ RAW_OBJS = \
 	V3Hashed.o \
 	V3Inline.o \
 	V3Inst.o \
+	V3InstrCount.o \
 	V3Life.o \
 	V3LifePost.o \
 	V3LinkCells.o \
--- a/src/V3InstrCount.cpp
+++ b/src/V3InstrCount.cpp
@ -0,0 +1,255 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: Estimate instruction count to run the logic
+//                         we would generate for any given AST subtree.
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#include "config_build.h"
+#include "verilatedos.h"
+
+#include "V3Ast.h"
+#include "V3InstrCount.h"
+
+/// Estimate the instruction cost for executing all logic within and below
+/// a given AST node. Note this estimates the number of instructions we'll
+/// execute, not the number we'll generate. That is, for conditionals,
+/// we'll count instructions from either the 'if' or the 'else' branch,
+/// whichever is larger. We know we won't run both.
+
+class InstrCountVisitor : public AstNVisitor {
+private:
+    // MEMBERS
+    uint32_t m_instrCount;  // Running count of instructions
+    const AstNode* m_startNodep;  // Start node of count
+    bool m_tracingCall;  // Iterating into a CCall to a CFunc
+    bool m_inCFunc;  // Inside AstCFunc
+    bool m_assertNoDups;  // Check for duplicates
+    int m_debug;  // Debug level, with possible override
+    unsigned m_debugDepth;  // Current tree depth for debug indent
+
+    // TYPES
+    // Little class to cleanly call startVisitBase/endVisitBase
+    class VisitBase {
+    private:
+        // MEMBERS
+        uint32_t m_savedCount;
+        AstNode* m_nodep;
+        InstrCountVisitor* m_visitor;
+    public:
+        // CONSTRUCTORS
+        VisitBase(InstrCountVisitor* visitor, AstNode* nodep)
+            : m_nodep(nodep), m_visitor(visitor) {
+            m_savedCount = m_visitor->startVisitBase(nodep);
+        }
+        ~VisitBase() {
+            m_visitor->endVisitBase(m_savedCount, m_nodep);
+        }
+    private:
+        VL_UNCOPYABLE(VisitBase);
+    };
+
+public:
+    // CONSTRUCTORS
+    InstrCountVisitor(AstNode* nodep, bool assertNoDups, int forceDebug)
+        : m_instrCount(0),
+          m_startNodep(nodep),
+          m_tracingCall(false),
+          m_inCFunc(false),
+          m_assertNoDups(assertNoDups),
+          m_debugDepth(0) {
+        m_debug = std::max(forceDebug, v3Global.opt.debugSrcLevel(__FILE__));
+        if (nodep) iterate(nodep);
+    }
+    virtual ~InstrCountVisitor() {}
+
+    // METHODS
+    uint32_t instrCount() const { return m_instrCount; }
+
+private:
+    int debug() const { return m_debug; }
+    string indent() { return string(m_debugDepth, ' ')+cvtToStr(m_debugDepth)+"> "; }
+
+    uint32_t startVisitBase(AstNode* nodep) {
+        if (m_assertNoDups && !m_inCFunc) {
+            // Ensure we don't count the same node twice
+            //
+            // We only enable this assert for the initial LogicMTask counts
+            // in V3Order. We can't enable it for the 2nd pass in V3EmitC,
+            // as we expect mtasks to contain common logic after V3Combine,
+            // so this would fail.
+            //
+            // Also, we expect some collisions within calls to CFuncs
+            // (which at the V3Order stage represent verilog tasks, not to
+            // the CFuncs that V3Order will generate.) So don't check for
+            // collisions in CFuncs.
+            if (nodep->user5p()) {
+                nodep->v3fatalSrc("Node originally inserted below logic vertex "
+                                  <<static_cast<AstNode*>(nodep->user5p()));
+            }
+            nodep->user5p(const_cast<void*>(reinterpret_cast<const void*>(m_startNodep)));
+        }
+
+        // Save the count, and add it back in during ~VisitBase This allows
+        // debug prints to show local cost of each subtree, so we can see a
+        // hierarchical view of the cost when in debug mode.
+        ++m_debugDepth;
+        uint32_t savedCount = m_instrCount;
+        m_instrCount = nodep->instrCount();
+        return savedCount;
+    }
+    void endVisitBase(uint32_t savedCount, AstNode* nodep) {
+        UINFO(8, indent()<<"cost "<<m_instrCount<<"  "<<nodep<<endl);
+        --m_debugDepth;
+        m_instrCount += savedCount;
+    }
+
+    // VISITORS
+    virtual void visit(AstNodeSel* nodep) {
+        // This covers both AstArraySel and AstWordSel
+        //
+        // If some vector is a bazillion dwords long, and we're selecting 1
+        // dword to read or write from it, our cost should be small.
+        //
+        // Hence, exclude the child of the AstWordSel from the computation,
+        // whose cost scales with the size of the entire (maybe large) vector.
+        VisitBase vb(this, nodep);
+        iterateAndNextNull(nodep->bitp());
+    }
+    virtual void visit(AstSel* nodep) {
+        // Similar to AstNodeSel above, a small select into a large vector
+        // is not expensive. Count the cost of the AstSel itself (scales with
+        // its width) and the cost of the lsbp() and widthp() nodes, but not
+        // the fromp() node which could be disproportionately large.
+        VisitBase vb(this, nodep);
+        iterateAndNextNull(nodep->lsbp());
+        iterateAndNextNull(nodep->widthp());
+    }
+    virtual void visit(AstSliceSel* nodep) {
+        nodep->v3fatalSrc("AstSliceSel unhandled");
+    }
+    virtual void visit(AstMemberSel* nodep) {
+        nodep->v3fatalSrc("AstMemberSel unhandled");
+    }
+    virtual void visit(AstConcat* nodep) {
+        // Nop.
+        //
+        // Ignore concat. The problem with counting concat is that when we
+        // have many things concatted together, it's not a single
+        // operation, but this:
+        //
+        //  concat(a, concat(b, concat(c, concat(d, ... ))))
+        //
+        // Then if we account a cost to each 'concat' that scales with its
+        // width, this whole operation ends up with a cost accounting that
+        // scales with N^2. Of course, the real operation isn't that
+        // expensive: we won't copy each element over and over, we'll just
+        // copy it once from its origin into its destination, so the actual
+        // cost is linear with the size of the data. We don't need to count
+        // the concat at all to reflect a linear cost; it's already there
+        // in the width of the destination (which we count) and the sum of
+        // the widths of the operands (ignored here).
+    }
+    virtual void visit(AstNodeIf* nodep) {
+        VisitBase vb(this, nodep);
+        iterateAndNextNull(nodep->condp());
+        uint32_t savedCount = m_instrCount;
+
+        UINFO(8, indent()<<"ifsp:\n");
+        m_instrCount = 0;
+        iterateAndNextNull(nodep->ifsp());
+        uint32_t ifCount = m_instrCount;
+
+        UINFO(8, indent()<<"elsesp:\n");
+        m_instrCount = 0;
+        iterateAndNextNull(nodep->elsesp());
+        uint32_t elseCount = m_instrCount;
+
+        m_instrCount = savedCount + std::max(ifCount, elseCount);
+    }
+    virtual void visit(AstNodeCond* nodep) {
+        // Just like if/else above, the ternary operator only evaluates
+        // one of the two expressions, so only count the max.
+        VisitBase vb(this, nodep);
+        iterateAndNextNull(nodep->condp());
+        uint32_t savedCount = m_instrCount;
+
+        UINFO(8, indent()<<"?\n");
+        m_instrCount = 0;
+        iterateAndNextNull(nodep->expr1p());
+        uint32_t ifCount = m_instrCount;
+
+        UINFO(8, indent()<<":\n");
+        m_instrCount = 0;
+        iterateAndNextNull(nodep->expr2p());
+        uint32_t elseCount = m_instrCount;
+
+        m_instrCount = savedCount + std::max(ifCount, elseCount);
+    }
+    virtual void visit(AstActive* nodep) {
+        // You'd think that the OrderLogicVertex's would be disjoint trees
+        // of stuff in the AST, but it isn't so: V3Order makes an
+        // OrderLogicVertex for each ACTIVE, and then also makes an
+        // OrderLogicVertex for each statement within the ACTIVE.
+        //
+        // To avoid double-counting costs, stop recursing and short-circuit
+        // the computation for each ACTIVE.
+        //
+        // Our intent is that this only stops at the root node of the
+        // search; there should be no actives beneath the root, as there
+        // are no actives-under-actives.  In any case, check that we're at
+        // root:
+        if (nodep != m_startNodep) {
+            nodep->v3fatalSrc("Multiple actives, or not start node");
+        }
+    }
+    virtual void visit(AstCCall* nodep) {
+        VisitBase vb(this, nodep);
+        iterateChildren(nodep);
+        m_tracingCall = true;
+        iterate(nodep->funcp());
+        if (m_tracingCall) {
+            nodep->v3fatalSrc("visit(AstCFunc) should have cleared m_tracingCall.");
+        }
+    }
+    virtual void visit(AstCFunc* nodep) {
+        // Don't count a CFunc other than by tracing a call or counting it
+        // from the root
+        if (!m_tracingCall && (nodep != m_startNodep)) {
+            nodep->v3fatalSrc("AstCFunc not under AstCCall, or not start node");
+        }
+        m_tracingCall = false;
+        bool saved_inCFunc = m_inCFunc;
+        m_inCFunc = true;
+        {
+            VisitBase vb(this, nodep);
+            iterateChildren(nodep);
+        }
+        m_inCFunc = saved_inCFunc;
+    }
+    virtual void visit(AstNode* nodep) {
+        VisitBase vb(this, nodep);
+        iterateChildren(nodep);
+    }
+
+    VL_UNCOPYABLE(InstrCountVisitor);
+};
+
+uint32_t V3InstrCount::count(AstNode* nodep, bool assertNoDups, int forceDebug) {
+    InstrCountVisitor visitor(nodep, assertNoDups, forceDebug);
+    return visitor.instrCount();
+}
--- a/src/V3InstrCount.h
+++ b/src/V3InstrCount.h
@ -0,0 +1,42 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: Estimate instruction count to run the logic
+//                         we would generate for any given AST subtree.
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#include "config_build.h"
+#include "verilatedos.h"
+
+class AstNode;
+
+class V3InstrCount {
+public:
+    // Return the estimate count of instructions we'd incur while running
+    // code in and under nodep.
+    //
+    // This is a rough estimate; we don't know what path we'll take through
+    // conditionals in nodep, so we assume we take the longest path.
+    //
+    // If nodep is an AstActive, returns 0.
+    // If nodep contains nested AstActives, raises an error.
+    //
+    // If assertNoDups is true, marks user5 on each AstNode scanned.  Then
+    // if we see the same node twice (across more than one call to count,
+    // potentially) raises an error.
+    static uint32_t count(AstNode* nodep, bool assertNoDups, int forceDebug = 0);
+};