mirror of
https://github.com/verilator/verilator.git
synced 2025-02-01 11:14:03 +00:00
338 lines
14 KiB
C++
338 lines
14 KiB
C++
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
//*************************************************************************
|
|
// DESCRIPTION: Verilator: Generic optimizations on a per function basis
|
|
//
|
|
// Code available from: https://verilator.org
|
|
//
|
|
//*************************************************************************
|
|
//
|
|
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
|
// can redistribute it and/or modify it under the terms of either the GNU
|
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
// Version 2.0.
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
//
|
|
//*************************************************************************
|
|
//
|
|
// - Split assignments to wide locations with Concat on the RHS
|
|
// at word boundaries:
|
|
// foo = {l, r};
|
|
// becomes (recursively):
|
|
// foo[_:_] = r;
|
|
// foo[_:_] = l;
|
|
//
|
|
// - Balance concatenation trees, e.g.:
|
|
// {a, {b, {c, d}}
|
|
// becomes:
|
|
// {{a, b}, {c, d}}
|
|
// Reality is more complex here, see the code.
|
|
//
|
|
//*************************************************************************
|
|
|
|
#include "V3PchAstMT.h"
|
|
|
|
#include "V3FuncOpt.h"
|
|
|
|
#include "V3Global.h"
|
|
#include "V3Stats.h"
|
|
#include "V3ThreadPool.h"
|
|
|
|
VL_DEFINE_DEBUG_FUNCTIONS;
|
|
|
|
class BalanceConcatTree final {
|
|
// STATELESS
|
|
|
|
// We keep the expressions, together with their offsets within a concatenation tree
|
|
struct Term final {
|
|
AstNodeExpr* exprp = nullptr;
|
|
size_t offset = 0;
|
|
|
|
Term() = default;
|
|
Term(AstNodeExpr* exprp, size_t offset)
|
|
: exprp{exprp}
|
|
, offset{offset} {}
|
|
};
|
|
|
|
// Recursive implementation of 'gatherTerms' below.
|
|
static void gatherTermsRecursive(AstNodeExpr* exprp, std::vector<AstNodeExpr*>& terms) {
|
|
if (AstConcat* const catp = VN_CAST(exprp, Concat)) {
|
|
// Recursive case: gather sub terms, right to left
|
|
gatherTermsRecursive(catp->rhsp(), terms);
|
|
gatherTermsRecursive(catp->lhsp(), terms);
|
|
return;
|
|
}
|
|
|
|
// Base case: different operation
|
|
terms.emplace_back(exprp);
|
|
}
|
|
|
|
// Gather terms in the tree rooted at the given node.
|
|
// Results are right to left, that is, index 0 in the returned vector
|
|
// is the rightmost term, index size()-1 is the leftmost term.
|
|
static std::vector<AstNodeExpr*> gatherTerms(AstConcat* rootp) {
|
|
std::vector<AstNodeExpr*> terms;
|
|
gatherTermsRecursive(rootp->rhsp(), terms);
|
|
gatherTermsRecursive(rootp->lhsp(), terms);
|
|
return terms;
|
|
}
|
|
|
|
// Construct a balanced concatenation from the given terms,
|
|
// between indices begin (inclusive), and end (exclusive).
|
|
// Note term[end].offset must be valid. term[end].vtxp is
|
|
// never referenced.
|
|
static AstNodeExpr* construct(const std::vector<Term>& terms, const size_t begin,
|
|
const size_t end) {
|
|
UASSERT(end < terms.size(), "Invalid end");
|
|
UASSERT(begin < end, "Invalid range");
|
|
// Base case: just return the term
|
|
if (end == begin + 1) return terms[begin].exprp;
|
|
|
|
// Recursive case:
|
|
// Compute the mid-point, trying to create roughly equal width intermediates
|
|
const size_t width = terms[end].offset - terms[begin].offset;
|
|
const size_t midOffset = width / 2 + terms[begin].offset;
|
|
const auto beginIt = terms.begin() + begin;
|
|
const auto endIt = terms.begin() + end;
|
|
const auto midIt = std::lower_bound(beginIt + 1, endIt - 1, midOffset, //
|
|
[&](const Term& term, size_t value) { //
|
|
return term.offset < value;
|
|
});
|
|
const size_t mid = begin + std::distance(beginIt, midIt);
|
|
UASSERT(begin < mid && mid < end, "Must make some progress");
|
|
// Construct the subtrees
|
|
AstNodeExpr* const rhsp = construct(terms, begin, mid);
|
|
AstNodeExpr* const lhsp = construct(terms, mid, end);
|
|
// Construct new node
|
|
AstNodeExpr* newp = new AstConcat{lhsp->fileline(), lhsp, rhsp};
|
|
newp->user1(true); // Must not attempt to balance again.
|
|
return newp;
|
|
}
|
|
|
|
// Returns replacement node, or nullptr if no change
|
|
static AstConcat* balance(AstConcat* const rootp) {
|
|
UINFO(9, "balanceConcat " << rootp << "\n");
|
|
// Gather all input vertices of the tree
|
|
const std::vector<AstNodeExpr*> exprps = gatherTerms(rootp);
|
|
// Don't bother with trivial trees
|
|
if (exprps.size() <= 3) return nullptr;
|
|
// Don't do it if any of the terms are impure
|
|
for (AstNodeExpr* const exprp : exprps) {
|
|
if (!exprp->isPure()) return nullptr;
|
|
}
|
|
|
|
// Construct the terms Vector that we are going to do processing on
|
|
std::vector<Term> terms(exprps.size() + 1);
|
|
// These are redundant (constructor does the same), but here they are for clarity
|
|
terms[0].offset = 0;
|
|
terms[exprps.size()].exprp = nullptr;
|
|
for (size_t i = 0; i < exprps.size(); ++i) {
|
|
terms[i].exprp = exprps[i]->unlinkFrBack();
|
|
terms[i + 1].offset = terms[i].offset + exprps[i]->width();
|
|
}
|
|
|
|
// Round 1: try to create terms ending on VL_EDATASIZE boundaries.
|
|
// This ensures we pack bits within a VL_EDATASIZE first is possible,
|
|
// and then hopefully we can just assemble VL_EDATASIZE words afterward.
|
|
std::vector<Term> terms2;
|
|
{
|
|
terms2.reserve(terms.size());
|
|
|
|
size_t begin = 0; // Start of current range considered
|
|
size_t end = 0; // End of current range considered
|
|
size_t offset = 0; // Offset of current range considered
|
|
|
|
// Create a term from the current range
|
|
const auto makeTerm = [&]() {
|
|
AstNodeExpr* const exprp = construct(terms, begin, end);
|
|
terms2.emplace_back(exprp, offset);
|
|
offset += exprp->width();
|
|
begin = end;
|
|
};
|
|
|
|
// Create all terms ending on a boundary.
|
|
while (++end < terms.size() - 1) {
|
|
if (terms[end].offset % VL_EDATASIZE == 0) makeTerm();
|
|
}
|
|
// Final term. Loop condition above ensures this always exists,
|
|
// and might or might not be on a boundary.
|
|
makeTerm();
|
|
// Sentinel term
|
|
terms2.emplace_back(nullptr, offset);
|
|
// should have ended up with the same number of bits at least...
|
|
UASSERT(terms2.back().offset == terms.back().offset, "Inconsitent terms");
|
|
}
|
|
|
|
// Round 2: Combine the partial terms
|
|
return VN_AS(construct(terms2, 0, terms2.size() - 1), Concat);
|
|
}
|
|
|
|
public:
|
|
static AstConcat* apply(AstConcat* rootp) { return balance(rootp); }
|
|
};
|
|
|
|
class FuncOptVisitor final : public VNVisitor {
|
|
// NODE STATE
|
|
// AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check.
|
|
// AstConcat::user() -> bool. Already balanced.
|
|
|
|
// STATE - Statistic tracking
|
|
VDouble0 m_balancedConcats; // Number of concatenations balanced
|
|
VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS
|
|
|
|
// True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...;
|
|
static bool readsLhs(AstNodeAssign* nodep) {
|
|
// It is expected that the number of vars written on the LHS is very small (should be 1).
|
|
std::unordered_set<const AstVar*> lhsWrVarps;
|
|
std::unordered_set<const AstVar*> lhsRdVarps;
|
|
nodep->lhsp()->foreach([&](const AstVarRef* refp) {
|
|
if (refp->access().isWriteOrRW()) lhsWrVarps.emplace(refp->varp());
|
|
if (refp->access().isReadOrRW()) lhsRdVarps.emplace(refp->varp());
|
|
});
|
|
|
|
// Common case of 1 variable on the LHS - special handling for speed
|
|
if (lhsWrVarps.size() == 1) {
|
|
const AstVar* const lhsWrVarp = *lhsWrVarps.begin();
|
|
// Check Rhs doesn't read the written var
|
|
const bool rhsReadsWritten = nodep->rhsp()->exists([=](const AstVarRef* refp) { //
|
|
return refp->varp() == lhsWrVarp;
|
|
});
|
|
if (rhsReadsWritten) return true;
|
|
// Check Lhs doesn't read the written var
|
|
return lhsRdVarps.count(lhsWrVarp);
|
|
}
|
|
|
|
// Generic case of multiple vars written on LHS
|
|
// TODO: this might be impossible due to earlier transforms, not sure
|
|
// Check Rhs doesn't read the written vars
|
|
const bool rhsReadsWritten = nodep->rhsp()->exists([&](const AstVarRef* refp) { //
|
|
return lhsWrVarps.count(refp->varp());
|
|
});
|
|
if (rhsReadsWritten) return true;
|
|
// Check Lhs doesn't read the written vars
|
|
for (const AstVar* const lhsWrVarp : lhsWrVarps) {
|
|
if (lhsRdVarps.count(lhsWrVarp)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// METHODS
|
|
// Split wide assignments with a wide concatenation on the RHS.
|
|
// Returns true if 'nodep' was deleted
|
|
bool splitConcat(AstNodeAssign* nodep) {
|
|
UINFO(9, "splitConcat " << nodep << "\n");
|
|
// Only care about concatenations on the right
|
|
AstConcat* const rhsp = VN_CAST(nodep->rhsp(), Concat);
|
|
if (!rhsp) return false;
|
|
// Will need the LHS
|
|
AstNodeExpr* lhsp = nodep->lhsp();
|
|
UASSERT_OBJ(lhsp->width() == rhsp->width(), nodep, "Inconsistent assignment");
|
|
// Only consider pure assignments. Nodes inserted below are safe.
|
|
if (!nodep->user1() && (!lhsp->isPure() || !rhsp->isPure())) return false;
|
|
// Check for a Sel on the LHS if present, and skip over it
|
|
uint32_t lsb = 0;
|
|
if (AstSel* const selp = VN_CAST(lhsp, Sel)) {
|
|
if (AstConst* const lsbp = VN_CAST(selp->lsbp(), Const)) {
|
|
lhsp = selp->fromp();
|
|
lsb = lsbp->toUInt();
|
|
} else {
|
|
// Don't optimize if it's a variable select
|
|
return false;
|
|
}
|
|
}
|
|
// No need to split assignments targeting storage smaller than a machine register
|
|
if (lhsp->width() <= VL_QUADSIZE) return false;
|
|
|
|
// If it's a concat straddling a word boundary, try to split it.
|
|
// The next visit on the new nodes will split it recursively.
|
|
// Otherwise, keep the original assignment.
|
|
const int lsbWord = lsb / VL_EDATASIZE;
|
|
const int msbWord = (lsb + rhsp->width() - 1) / VL_EDATASIZE;
|
|
if (lsbWord == msbWord) return false;
|
|
|
|
// If the RHS reads the LHS, we can't actually do this. Nodes inserted below are safe.
|
|
if (!nodep->user1() && readsLhs(nodep)) return false;
|
|
|
|
// Ok, actually split it now
|
|
UINFO(5, "splitConcat optimizing " << nodep << "\n");
|
|
++m_concatSplits;
|
|
// The 2 parts and their offsets
|
|
AstNodeExpr* const rrp = rhsp->rhsp()->unlinkFrBack();
|
|
AstNodeExpr* const rlp = rhsp->lhsp()->unlinkFrBack();
|
|
const int rLsb = lsb;
|
|
const int lLsb = lsb + rrp->width();
|
|
// Insert the 2 assignment right after the original. They will be visited next.
|
|
AstAssign* const arp = new AstAssign{
|
|
nodep->fileline(),
|
|
new AstSel{lhsp->fileline(), lhsp->cloneTreePure(false), rLsb, rrp->width()}, rrp};
|
|
AstAssign* const alp = new AstAssign{
|
|
nodep->fileline(),
|
|
new AstSel{lhsp->fileline(), lhsp->unlinkFrBack(), lLsb, rlp->width()}, rlp};
|
|
nodep->addNextHere(arp);
|
|
arp->addNextHere(alp);
|
|
// Safe to split these.
|
|
arp->user1(true);
|
|
alp->user1(true);
|
|
// Nuke what is left
|
|
VL_DO_DANGLING(pushDeletep(nodep->unlinkFrBack()), nodep);
|
|
return true;
|
|
}
|
|
|
|
// VISIT
|
|
void visit(AstNodeAssign* nodep) override {
|
|
// TODO: Only thing remaining inside functions should be AstAssign (that is, an actual
|
|
// assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix.
|
|
iterateChildren(nodep);
|
|
|
|
if (v3Global.opt.fFuncSplitCat()) {
|
|
if (splitConcat(nodep)) return; // Must return here, in case more code is added below
|
|
}
|
|
}
|
|
|
|
void visit(AstConcat* nodep) override {
|
|
if (v3Global.opt.fFuncBalanceCat() && !nodep->user1() && !VN_IS(nodep->backp(), Concat)) {
|
|
if (AstConcat* const newp = BalanceConcatTree::apply(nodep)) {
|
|
UINFO(5, "balanceConcat optimizing " << nodep << "\n");
|
|
++m_balancedConcats;
|
|
nodep->replaceWith(newp);
|
|
VL_DO_DANGLING(pushDeletep(nodep), nodep);
|
|
newp->user1(true); // Must not attempt again.
|
|
// Return here. The new node will be iterated next.
|
|
return;
|
|
}
|
|
}
|
|
iterateChildren(nodep);
|
|
}
|
|
|
|
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
|
|
|
// CONSTRUCTORS
|
|
explicit FuncOptVisitor(AstCFunc* funcp) { iterateChildren(funcp); }
|
|
~FuncOptVisitor() override {
|
|
V3Stats::addStatSum("Optimizations, FuncOpt concat trees balanced", m_balancedConcats);
|
|
V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits);
|
|
}
|
|
|
|
public:
|
|
static void apply(AstCFunc* funcp) { FuncOptVisitor{funcp}; }
|
|
};
|
|
|
|
//######################################################################
|
|
|
|
void V3FuncOpt::funcOptAll(AstNetlist* nodep) {
|
|
UINFO(2, __FUNCTION__ << ": " << endl);
|
|
{
|
|
const VNUser1InUse user1InUse;
|
|
V3ThreadScope threadScope;
|
|
for (AstNodeModule *modp = nodep->modulesp(), *nextModp; modp; modp = nextModp) {
|
|
nextModp = VN_AS(modp->nextp(), NodeModule);
|
|
for (AstNode *nodep = modp->stmtsp(), *nextNodep; nodep; nodep = nextNodep) {
|
|
nextNodep = nodep->nextp();
|
|
if (AstCFunc* const cfuncp = VN_CAST(nodep, CFunc)) {
|
|
threadScope.enqueue([cfuncp]() { FuncOptVisitor::apply(cfuncp); });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
V3Global::dumpCheckGlobalTree("funcopt", 0, dumpTreeEitherLevel() >= 3);
|
|
}
|