mirror of
https://github.com/verilator/verilator.git
synced 2025-01-01 04:07:34 +00:00
Split up assignments to wides with Concat on the RHS (#5599)
Add a new pass to split up (recursively): foo = {l, r}; into the following, with the right indices, iff the concatenation straddles a wide word boundary. foo[_:_] = r; foo[_:_] = l; This eliminates more wide temporaries. Another 23% speedup on VeeR EH2 high_perf. Also brings the predicted stack size from 8M to 40k.
This commit is contained in:
parent
7f1aae640f
commit
77ef2cd487
@ -589,6 +589,10 @@ Summary:
|
||||
|
||||
.. option:: -fno-expand
|
||||
|
||||
.. option:: -fno-func-opt
|
||||
|
||||
.. option:: -fno-func-opt-split-cat
|
||||
|
||||
.. option:: -fno-gate
|
||||
|
||||
.. option:: -fno-inline
|
||||
|
@ -95,6 +95,7 @@ set(HEADERS
|
||||
V3Force.h
|
||||
V3Fork.h
|
||||
V3FunctionTraits.h
|
||||
V3FuncOpt.h
|
||||
V3Gate.h
|
||||
V3Global.h
|
||||
V3Graph.h
|
||||
@ -255,6 +256,7 @@ set(COMMON_SOURCES
|
||||
V3FileLine.cpp
|
||||
V3Force.cpp
|
||||
V3Fork.cpp
|
||||
V3FuncOpt.cpp
|
||||
V3Gate.cpp
|
||||
V3Global.cpp
|
||||
V3Graph.cpp
|
||||
|
@ -204,6 +204,7 @@ RAW_OBJS_PCH_ASTMT = \
|
||||
V3EmitCPch.o \
|
||||
V3EmitV.o \
|
||||
V3File.o \
|
||||
V3FuncOpt.o \
|
||||
V3Global.o \
|
||||
V3Hasher.o \
|
||||
V3Number.o \
|
||||
|
182
src/V3FuncOpt.cpp
Normal file
182
src/V3FuncOpt.cpp
Normal file
@ -0,0 +1,182 @@
|
||||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Generic optimizations on a per function basis
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2024 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// - Split assignments to wide locations with Concat on the RHS
|
||||
// at word boundaries:
|
||||
// foo = {l, r};
|
||||
// becomes (recursively):
|
||||
// foo[_:_] = r;
|
||||
// foo[_:_] = l;
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "V3PchAstMT.h"
|
||||
|
||||
#include "V3FuncOpt.h"
|
||||
|
||||
#include "V3Global.h"
|
||||
#include "V3Stats.h"
|
||||
#include "V3ThreadPool.h"
|
||||
|
||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||
|
||||
class FuncOptVisitor final : public VNVisitor {
|
||||
// NODE STATE
|
||||
// AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check.
|
||||
|
||||
// STATE - Statistic tracking
|
||||
VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS
|
||||
|
||||
// True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...;
|
||||
static bool readsLhs(AstNodeAssign* nodep) {
|
||||
// It is expected that the number of vars written on the LHS is very small (should be 1).
|
||||
std::unordered_set<const AstVar*> lhsWrVarps;
|
||||
std::unordered_set<const AstVar*> lhsRdVarps;
|
||||
nodep->lhsp()->foreach([&](const AstVarRef* refp) {
|
||||
if (refp->access().isWriteOrRW()) lhsWrVarps.emplace(refp->varp());
|
||||
if (refp->access().isReadOrRW()) lhsRdVarps.emplace(refp->varp());
|
||||
});
|
||||
|
||||
// Common case of 1 variable on the LHS - special handling for speed
|
||||
if (lhsWrVarps.size() == 1) {
|
||||
const AstVar* const lhsWrVarp = *lhsWrVarps.begin();
|
||||
// Check Rhs doesn't read the written var
|
||||
const bool rhsReadsWritten = nodep->rhsp()->exists([=](const AstVarRef* refp) { //
|
||||
return refp->varp() == lhsWrVarp;
|
||||
});
|
||||
if (rhsReadsWritten) return true;
|
||||
// Check Lhs doesn't read the written var
|
||||
return lhsRdVarps.count(lhsWrVarp);
|
||||
}
|
||||
|
||||
// Generic case of multiple vars written on LHS
|
||||
// TODO: this might be impossible due to earlier transforms, not sure
|
||||
// Check Rhs doesn't read the written vars
|
||||
const bool rhsReadsWritten = nodep->rhsp()->exists([&](const AstVarRef* refp) { //
|
||||
return lhsWrVarps.count(refp->varp());
|
||||
});
|
||||
if (rhsReadsWritten) return true;
|
||||
// Check Lhs doesn't read the written vars
|
||||
for (const AstVar* const lhsWrVarp : lhsWrVarps) {
|
||||
if (lhsRdVarps.count(lhsWrVarp)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// METHODS
|
||||
// Split wide assignments with a wide concatenation on the RHS.
|
||||
// Returns true if 'nodep' was deleted
|
||||
bool splitConcat(AstNodeAssign* nodep) {
|
||||
UINFO(9, "splitConcat " << nodep << "\n");
|
||||
// Only care about concatenations on the right
|
||||
AstConcat* const rhsp = VN_CAST(nodep->rhsp(), Concat);
|
||||
if (!rhsp) return false;
|
||||
// Will need the LHS
|
||||
AstNodeExpr* lhsp = nodep->lhsp();
|
||||
UASSERT_OBJ(lhsp->width() == rhsp->width(), nodep, "Inconsistent assignment");
|
||||
// Only consider pure assignments. Nodes inserted below are safe.
|
||||
if (!nodep->user1() && (!lhsp->isPure() || !rhsp->isPure())) return false;
|
||||
// Check for a Sel on the LHS if present, and skip over it
|
||||
uint32_t lsb = 0;
|
||||
if (AstSel* const selp = VN_CAST(lhsp, Sel)) {
|
||||
if (AstConst* const lsbp = VN_CAST(selp->lsbp(), Const)) {
|
||||
lhsp = selp->fromp();
|
||||
lsb = lsbp->toUInt();
|
||||
} else {
|
||||
// Don't optimize if it's a variable select
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// No need to split assignments targeting storage smaller than a machine register
|
||||
if (lhsp->width() <= VL_QUADSIZE) return false;
|
||||
|
||||
// If it's a concat straddling a word boundary, try to split it.
|
||||
// The next visit on the new nodes will split it recursively.
|
||||
// Otherwise, keep the original assignment.
|
||||
const int lsbWord = lsb / VL_EDATASIZE;
|
||||
const int msbWord = (lsb + rhsp->width() - 1) / VL_EDATASIZE;
|
||||
if (lsbWord == msbWord) return false;
|
||||
|
||||
// If the RHS reads the LHS, we can't actually do this. Nodes inserted below are safe.
|
||||
if (!nodep->user1() && readsLhs(nodep)) return false;
|
||||
|
||||
// Ok, actually split it now
|
||||
UINFO(5, "splitConcat optimizing " << nodep << "\n");
|
||||
++m_concatSplits;
|
||||
// The 2 parts and their offsets
|
||||
AstNodeExpr* const rrp = rhsp->rhsp()->unlinkFrBack();
|
||||
AstNodeExpr* const rlp = rhsp->lhsp()->unlinkFrBack();
|
||||
const int rLsb = lsb;
|
||||
const int lLsb = lsb + rrp->width();
|
||||
// Insert the 2 assignment right after the original. They will be visited next.
|
||||
AstAssign* const arp = new AstAssign{
|
||||
nodep->fileline(),
|
||||
new AstSel{lhsp->fileline(), lhsp->cloneTreePure(false), rLsb, rrp->width()}, rrp};
|
||||
AstAssign* const alp = new AstAssign{
|
||||
nodep->fileline(),
|
||||
new AstSel{lhsp->fileline(), lhsp->unlinkFrBack(), lLsb, rlp->width()}, rlp};
|
||||
nodep->addNextHere(arp);
|
||||
arp->addNextHere(alp);
|
||||
// Safe to split these.
|
||||
arp->user1(true);
|
||||
alp->user1(true);
|
||||
// Nuke what is left
|
||||
VL_DO_DANGLING(pushDeletep(nodep->unlinkFrBack()), nodep);
|
||||
return true;
|
||||
}
|
||||
|
||||
// VISIT
|
||||
void visit(AstNodeAssign* nodep) override {
|
||||
// TODO: Only thing remaining inside functions should be AstAssign (that is, an actual
|
||||
// assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix.
|
||||
if (v3Global.opt.fFuncSplitCat()) {
|
||||
if (splitConcat(nodep)) return; // Must return here, in case more code is added below
|
||||
}
|
||||
}
|
||||
|
||||
void visit(AstNodeExpr*) override {} // No need to descend further (Ignore AstExprStmt...)
|
||||
|
||||
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
||||
|
||||
// CONSTRUCTORS
|
||||
explicit FuncOptVisitor(AstCFunc* funcp) { iterateChildren(funcp); }
|
||||
~FuncOptVisitor() override {
|
||||
V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits);
|
||||
}
|
||||
|
||||
public:
|
||||
static void apply(AstCFunc* funcp) { FuncOptVisitor{funcp}; }
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
|
||||
void V3FuncOpt::funcOptAll(AstNetlist* nodep) {
|
||||
UINFO(2, __FUNCTION__ << ": " << endl);
|
||||
{
|
||||
const VNUser1InUse user1InUse;
|
||||
V3ThreadScope threadScope;
|
||||
for (AstNodeModule *modp = nodep->modulesp(), *nextModp; modp; modp = nextModp) {
|
||||
nextModp = VN_AS(modp->nextp(), NodeModule);
|
||||
for (AstNode *nodep = modp->stmtsp(), *nextNodep; nodep; nodep = nextNodep) {
|
||||
nextNodep = nodep->nextp();
|
||||
if (AstCFunc* const cfuncp = VN_CAST(nodep, CFunc)) {
|
||||
threadScope.enqueue([cfuncp]() { FuncOptVisitor::apply(cfuncp); });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
V3Global::dumpCheckGlobalTree("funcopt", 0, dumpTreeEitherLevel() >= 3);
|
||||
}
|
32
src/V3FuncOpt.h
Normal file
32
src/V3FuncOpt.h
Normal file
@ -0,0 +1,32 @@
|
||||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Generic optimizations on a per function basis
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2024 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#ifndef VERILATOR_V3FUNCOPT_H_
|
||||
#define VERILATOR_V3FUNCOPT_H_
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
class AstNetlist;
|
||||
|
||||
//============================================================================
|
||||
|
||||
class V3FuncOpt final {
|
||||
public:
|
||||
static void funcOptAll(AstNetlist* nodep);
|
||||
};
|
||||
|
||||
#endif // Guard
|
@ -1303,6 +1303,10 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
||||
DECL_OPTION("-fdead-assigns", FOnOff, &m_fDeadAssigns);
|
||||
DECL_OPTION("-fdead-cells", FOnOff, &m_fDeadCells);
|
||||
DECL_OPTION("-fexpand", FOnOff, &m_fExpand);
|
||||
DECL_OPTION("-ffunc-opt", CbFOnOff, [this](bool flag) { //
|
||||
m_fFuncSplitCat = flag;
|
||||
});
|
||||
DECL_OPTION("-ffunc-opt-split-cat", FOnOff, &m_fFuncSplitCat);
|
||||
DECL_OPTION("-fgate", FOnOff, &m_fGate);
|
||||
DECL_OPTION("-finline", FOnOff, &m_fInline);
|
||||
DECL_OPTION("-flife", FOnOff, &m_fLife);
|
||||
|
@ -384,6 +384,7 @@ private:
|
||||
bool m_fDeadAssigns; // main switch: -fno-dead-assigns: remove dead assigns
|
||||
bool m_fDeadCells; // main switch: -fno-dead-cells: remove dead cells
|
||||
bool m_fExpand; // main switch: -fno-expand: expansion of C macros
|
||||
bool m_fFuncSplitCat = true; // main switch: -fno-func-split-cat: expansion of C macros
|
||||
bool m_fGate; // main switch: -fno-gate: gate wire elimination
|
||||
bool m_fInline; // main switch: -fno-inline: module inlining
|
||||
bool m_fLife; // main switch: -fno-life: variable lifetime
|
||||
@ -674,6 +675,8 @@ public:
|
||||
bool fDeadAssigns() const { return m_fDeadAssigns; }
|
||||
bool fDeadCells() const { return m_fDeadCells; }
|
||||
bool fExpand() const { return m_fExpand; }
|
||||
bool fFuncSplitCat() const { return m_fFuncSplitCat; }
|
||||
bool fFunc() const { return fFuncSplitCat(); }
|
||||
bool fGate() const { return m_fGate; }
|
||||
bool fInline() const { return m_fInline; }
|
||||
bool fLife() const { return m_fLife; }
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "V3File.h"
|
||||
#include "V3Force.h"
|
||||
#include "V3Fork.h"
|
||||
#include "V3FuncOpt.h"
|
||||
#include "V3Gate.h"
|
||||
#include "V3Global.h"
|
||||
#include "V3Graph.h"
|
||||
@ -497,6 +498,9 @@ static void process() {
|
||||
// --GENERATION------------------
|
||||
|
||||
if (!v3Global.opt.serializeOnly()) {
|
||||
// Generic optimizations on a per-function basis
|
||||
if (v3Global.opt.fFunc()) V3FuncOpt::funcOptAll(v3Global.rootp());
|
||||
|
||||
// Remove unused vars
|
||||
V3Const::constifyAll(v3Global.rootp());
|
||||
V3Dead::deadifyAll(v3Global.rootp());
|
||||
|
@ -17,5 +17,8 @@ test.file_grep(test.stats,
|
||||
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0)
|
||||
test.file_grep(test.stats,
|
||||
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, FuncOpt concat splits\s+(\d+)', 62)
|
||||
|
||||
test.passes()
|
||||
|
26
test_regress/t/t_dfg_balance_cats_nofunc.py
Executable file
26
test_regress/t/t_dfg_balance_cats_nofunc.py
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.top_filename = "t/t_dfg_balance_cats.v"
|
||||
|
||||
test.compile(verilator_flags2=["--stats", "-fno-func-opt"])
|
||||
|
||||
test.file_grep(test.stats,
|
||||
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0)
|
||||
test.file_grep(test.stats,
|
||||
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1)
|
||||
test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat splits')
|
||||
|
||||
test.passes()
|
Loading…
Reference in New Issue
Block a user