DFG: Extract cyclic components separately

A lot of optimizations in DFG assume a DAG, but the more things are
representable, the more likely it is that a small cyclic sub-graph is
present in an otherwise very large graph that is mostly acyclic. In
order to avoid loosing optimization opportunities, we explicitly extract
the cyclic sub-graphs (which are the strongly connected components +
anything feeing them, up to variable boundaries) and treat them
separately. This enables optimization of the remaining input.
This commit is contained in:
Geza Lore 2022-09-28 14:42:18 +01:00
parent acebafcbc2
commit c9d6344f2f
4 changed files with 469 additions and 30 deletions

View File

@ -22,8 +22,11 @@
#include "V3File.h"
#include <cctype>
#include <type_traits>
#include <unordered_map>
VL_DEFINE_DEBUG_FUNCTIONS;
//------------------------------------------------------------------------------
// DfgGraph
//------------------------------------------------------------------------------
@ -114,7 +117,7 @@ bool DfgGraph::sortTopologically(bool reverse) {
return true;
}
std::vector<std::unique_ptr<DfgGraph>> DfgGraph::splitIntoComponents() {
std::vector<std::unique_ptr<DfgGraph>> DfgGraph::splitIntoComponents(std::string label) {
size_t componentNumber = 0;
std::unordered_map<const DfgVertex*, unsigned> vertex2component;
@ -149,8 +152,10 @@ std::vector<std::unique_ptr<DfgGraph>> DfgGraph::splitIntoComponents() {
// Create the component graphs
std::vector<std::unique_ptr<DfgGraph>> results{componentNumber};
const std::string prefix{name() + (label.empty() ? "" : "-") + label + "-component-"};
for (size_t i = 0; i < componentNumber; ++i) {
results[i].reset(new DfgGraph{*m_modulep, name() + "-component-" + cvtToStr(i)});
results[i].reset(new DfgGraph{*m_modulep, prefix + cvtToStr(i)});
}
// Move all vertices under the corresponding component graphs
@ -164,6 +169,351 @@ std::vector<std::unique_ptr<DfgGraph>> DfgGraph::splitIntoComponents() {
return results;
}
class ExtractCyclicComponents final {
static constexpr size_t UNASSIGNED = std::numeric_limits<size_t>::max();
// TYPES
struct VertexState {
size_t index; // Used by Pearce's algorithm for detecting SCCs
size_t component = UNASSIGNED; // Result component number (0 stays in input graph)
VertexState(size_t index)
: index{index} {}
};
// STATE
//==========================================================================
// Shared state
DfgGraph& m_dfg; // The input graph
const std::string m_prefix; // Component name prefix
std::unordered_map<const DfgVertex*, VertexState> m_state; // Vertex state
size_t m_nonTrivialSCCs = 0; // Number of non-trivial SCCs in the graph
const bool m_doExpensiveChecks = v3Global.opt.debugCheck();
//==========================================================================
// State for Pearce's algorithm for detecting SCCs
size_t m_index = 0; // Visitation index counter
std::vector<DfgVertex*> m_stack; // The stack used by the algorithm
//==========================================================================
// State for merging
std::unordered_set<const DfgVertex*> m_merged; // Marks visited vertices
//==========================================================================
// State for extraction
// The extracted cyclic components
std::vector<std::unique_ptr<DfgGraph>> m_components;
// Map from 'variable vertex' -> 'component index' -> 'clone in that component'
std::unordered_map<const DfgVertexLValue*, std::unordered_map<size_t, DfgVertexLValue*>>
m_clones;
// METHODS
//==========================================================================
// Methods for Pearce's algorithm to detect strongly connected components
void visitColorSCCs(DfgVertex& vtx) {
const auto pair = m_state.emplace(std::piecewise_construct, //
std::forward_as_tuple(&vtx), //
std::forward_as_tuple(m_index));
// If already visited, then nothing to do
if (!pair.second) return;
// Visiting node
const size_t rootIndex = m_index++;
vtx.forEachSink([&](DfgVertex& child) {
// Visit child
visitColorSCCs(child);
auto& childSatate = m_state.at(&child);
// If the child is not in an SCC
if (childSatate.component == UNASSIGNED) {
auto& vtxState = m_state.at(&vtx);
if (vtxState.index > childSatate.index) vtxState.index = childSatate.index;
}
});
auto& vtxState = m_state.at(&vtx);
if (vtxState.index == rootIndex) {
// This is the 'root' of an SCC
// A trivial SCC contains only a single vertex
const bool isTrivial = m_stack.empty() || m_state.at(m_stack.back()).index < rootIndex;
// We also need a separate component for vertices that drive themselves (which can
// happen for input like 'assign a = a'), as we want to extract them (they are cyclic).
const bool drivesSelf = vtx.findSink<DfgVertex>([&vtx](const DfgVertex& sink) { //
return &vtx == &sink;
});
if (!isTrivial || drivesSelf) {
// Allocate new component
++m_nonTrivialSCCs;
vtxState.component = m_nonTrivialSCCs;
while (!m_stack.empty()) {
DfgVertex* const topp = m_stack.back();
auto& topState = m_state.at(topp);
// Only higher nodes belong to the same SCC
if (topState.index < rootIndex) break;
m_stack.pop_back();
topState.component = m_nonTrivialSCCs;
}
} else {
// Trivial SCC (and does not drive itself), so acyclic. Keep it in original graph.
vtxState.component = 0;
}
} else {
// Not the root of an SCC
m_stack.push_back(&vtx);
}
}
void colorSCCs() {
// Implements Pearce's algorithm to color the strongly connected components. For reference
// see "An Improved Algorithm for Finding the Strongly Connected Components of a Directed
// Graph", David J.Pearce, 2005
m_state.reserve(m_dfg.size());
m_dfg.forEachVertex([&](DfgVertex& vtx) { visitColorSCCs(vtx); });
}
//==========================================================================
// Methods for merging
void visitMergeSCCs(const DfgVertex& vtx, size_t targetComponent) {
// We stop at variable boundaries, which is where we will split the graphs
if (vtx.is<DfgVarPacked>() || vtx.is<DfgVarArray>()) return;
// Mark visited/move on if already visited
if (!m_merged.insert(&vtx).second) return;
// Assign vertex to the target component
m_state.at(&vtx).component = targetComponent;
// Visit all neighbours
vtx.forEachSource([=](const DfgVertex& other) { visitMergeSCCs(other, targetComponent); });
vtx.forEachSink([=](const DfgVertex& other) { visitMergeSCCs(other, targetComponent); });
}
void mergeSCCs() {
// Ensure that component boundaries are always at variables, by merging SCCs
m_merged.reserve(m_dfg.size());
m_dfg.forEachVertex([this](DfgVertex& vtx) {
// Start DFS from each vertex that is in a non-trivial SCC, and merge everything that
// is reachable from it into this component.
if (const size_t target = m_state.at(&vtx).component) visitMergeSCCs(vtx, target);
});
}
//==========================================================================
// Methods for extraction
// Retrieve clone of vertex in the given component
DfgVertexLValue& getClone(DfgVertexLValue& vtx, size_t component) {
UASSERT_OBJ(m_state.at(&vtx).component != component, &vtx, "Vertex is in that component");
DfgVertexLValue*& clonep = m_clones[&vtx][component];
if (!clonep) {
DfgGraph& dfg = component == 0 ? m_dfg : *m_components[component - 1];
if (DfgVarPacked* const pVtxp = vtx.cast<DfgVarPacked>()) {
clonep = new DfgVarPacked{dfg, pVtxp->varp()};
} else if (DfgVarArray* const aVtxp = vtx.cast<DfgVarArray>()) {
clonep = new DfgVarArray{dfg, aVtxp->varp()};
}
UASSERT_OBJ(clonep, &vtx, "Unhandled 'DfgVertexLValue' sub-type");
if (VL_UNLIKELY(m_doExpensiveChecks)) {
// Assign component number of clone for later checks
m_state
.emplace(std::piecewise_construct, std::forward_as_tuple(clonep),
std::forward_as_tuple(0))
.first->second.component
= component;
}
// We need to mark both the original and the clone as having additional references
vtx.setHasModRefs();
clonep->setHasModRefs();
}
return *clonep;
}
// Fix up non-variable sources of a DfgVertexLValue that are in a different component,
// using the provided 'relink' callback
template <typename T_Vertex>
void fixSources(T_Vertex& vtx, std::function<void(T_Vertex&, DfgVertex&, size_t)> relink) {
static_assert(std::is_base_of<DfgVertexLValue, T_Vertex>::value,
"'Vertex' must be a 'DfgVertexLValue'");
const size_t component = m_state.at(&vtx).component;
vtx.forEachSourceEdge([&](DfgEdge& edge, size_t idx) {
DfgVertex& source = *edge.sourcep();
// DfgVertexLValue sources are fixed up by `fixSinks` on those sources
if (source.is<DfgVarPacked>() || source.is<DfgVarArray>()) return;
const size_t sourceComponent = m_state.at(&source).component;
// Same component is OK
if (sourceComponent == component) return;
// Unlink the source edge (source is reconnected by 'relink'
edge.unlinkSource();
// Apply the fixup
DfgVertexLValue& clone = getClone(vtx, sourceComponent);
relink(*(clone.as<T_Vertex>()), source, idx);
});
}
// Fix up sinks of given variable vertex that are in a different component
void fixSinks(DfgVertexLValue& vtx) {
const size_t component = m_state.at(&vtx).component;
vtx.forEachSinkEdge([&](DfgEdge& edge) {
const size_t sinkComponent = m_state.at(edge.sinkp()).component;
// Same component is OK
if (sinkComponent == component) return;
// Relink the sink to read the clone
edge.relinkSource(&getClone(vtx, sinkComponent));
});
}
// Fix edges that cross components
void fixEdges(DfgVertex& vtx) {
if (DfgVarPacked* const vvtxp = vtx.cast<DfgVarPacked>()) {
fixSources<DfgVarPacked>(
*vvtxp, [&](DfgVarPacked& clone, DfgVertex& driver, size_t driverIdx) {
clone.addDriver(vvtxp->driverFileLine(driverIdx), //
vvtxp->driverLsb(driverIdx), &driver);
});
fixSinks(*vvtxp);
return;
}
if (DfgVarArray* const vvtxp = vtx.cast<DfgVarArray>()) {
fixSources<DfgVarArray>( //
*vvtxp, [&](DfgVarArray& clone, DfgVertex& driver, size_t driverIdx) {
clone.addDriver(vvtxp->driverFileLine(driverIdx), //
vvtxp->driverIndex(driverIdx), &driver);
});
fixSinks(*vvtxp);
return;
}
if (VL_UNLIKELY(m_doExpensiveChecks)) {
// Non-variable vertex. Just check that edges do not cross components
const size_t component = m_state.at(&vtx).component;
vtx.forEachSourceEdge([&](DfgEdge& edge, size_t) {
DfgVertex& source = *edge.sourcep();
// OK to cross at variables
if (source.is<DfgVarPacked>() || source.is<DfgVarArray>()) return;
UASSERT_OBJ(component == m_state.at(&source).component, &vtx,
"Component crossing edge without variable involvement");
});
}
}
static void packSources(DfgGraph& dfg) {
// Remove undriven variable sources
dfg.forEachVertex([&](DfgVertex& vtx) {
if (DfgVarPacked* const vtxp = vtx.cast<DfgVarPacked>()) {
vtxp->packSources();
return;
}
if (DfgVarArray* const vtxp = vtx.cast<DfgVarArray>()) {
vtxp->packSources();
return;
}
});
}
static void checkEdges(DfgGraph& dfg) {
// Check that each edge connects to a vertex that is within the same graph.
// Also check variable vertex sources are all connected.
std::unordered_set<const DfgVertex*> vertices{dfg.size()};
dfg.forEachVertex([&](const DfgVertex& vtx) { vertices.insert(&vtx); });
dfg.forEachVertex([&](const DfgVertex& vtx) {
vtx.forEachSource([&](const DfgVertex& src) {
UASSERT_OBJ(vertices.count(&src), &vtx, "Source vertex not in graph");
});
vtx.forEachSink([&](const DfgVertex& snk) {
UASSERT_OBJ(vertices.count(&snk), &snk, "Sink vertex not in graph");
});
if (const DfgVarPacked* const vtxp = vtx.cast<DfgVarPacked>()) {
vtxp->forEachSourceEdge([](const DfgEdge& edge, size_t) {
UASSERT_OBJ(edge.sourcep(), edge.sinkp(), "Missing source on variable vertex");
});
return;
}
if (const DfgVarArray* const vtxp = vtx.cast<DfgVarArray>()) {
vtxp->forEachSourceEdge([](const DfgEdge& edge, size_t) {
UASSERT_OBJ(edge.sourcep(), edge.sinkp(), "Missing source on variable vertex");
});
return;
}
});
}
void extractComponents() {
// If the graph was acyclic (which should be the common case), there will be no non-trivial
// SCCs, so we are done.
if (!m_nonTrivialSCCs) return;
// Allocate result graphs
m_components.resize(m_nonTrivialSCCs);
for (size_t i = 0; i < m_nonTrivialSCCs; ++i) {
m_components[i].reset(new DfgGraph{*m_dfg.modulep(), m_prefix + cvtToStr(i)});
}
// Fix up edges crossing components, and move vertices into their correct component. Note
// that fixing up the edges can create clones. Clones are added to the correct component,
// which also means that they might be added to the original DFG. Clones do not need
// fixing up, but also are not necessarily in the m_state map (in fact they are only there
// in debug mode), so we only iterate up to the original vertices. Because any new vertex
// is added at the end of the vertex list, we can just do this by iterating a fixed number
// of vertices.
size_t vertexCount = m_dfg.size();
m_dfg.forEachVertex([&](DfgVertex& vtx) {
if (!vertexCount) return;
--vertexCount;
// Fix up the edges crossing components
fixEdges(vtx);
// Move the vertex to the component graph (leave component 0, which is the originally
// acyclic sub-graph, in the original graph)
if (const size_t component = m_state.at(&vtx).component) {
m_dfg.removeVertex(vtx);
m_components[component - 1]->addVertex(vtx);
}
});
// Pack sources of variables to remove the now undriven inputs
// (cloning might have unlinked some of the inputs),
packSources(m_dfg);
for (const auto& dfgp : m_components) packSources(*dfgp);
if (VL_UNLIKELY(m_doExpensiveChecks)) {
// Check results for consistency
checkEdges(m_dfg);
for (const auto& dfgp : m_components) checkEdges(*dfgp);
}
}
// CONSTRUCTOR - entry point
explicit ExtractCyclicComponents(DfgGraph& dfg, std::string label)
: m_dfg{dfg}
, m_prefix{dfg.name() + (label.empty() ? "" : "-") + label + "-component-"} {
// Find all the non-trivial SCCs (and trivial cycles) in the graph
colorSCCs();
// Ensure that component boundaries are always at variables, by merging SCCs
mergeSCCs();
// Extract the components
extractComponents();
}
public:
static std::vector<std::unique_ptr<DfgGraph>> apply(DfgGraph& dfg, const std::string& label) {
return std::move(ExtractCyclicComponents{dfg, label}.m_components);
}
};
std::vector<std::unique_ptr<DfgGraph>> DfgGraph::extractCyclicComponents(std::string label) {
return ExtractCyclicComponents::apply(*this, label);
}
void DfgGraph::runToFixedPoint(std::function<bool(DfgVertex&)> f) {
bool changed;
const auto apply = [&](DfgVertex& vtx) -> void {
@ -278,7 +628,9 @@ static void dumpDotVertexAndSourceEdges(std::ostream& os, const DfgVertex& vtx)
vtx.forEachSourceEdge([&](const DfgEdge& edge, size_t idx) { //
if (edge.sourcep()) {
string headLabel;
if (vtx.arity() > 1) headLabel = vtx.srcName(idx);
if (vtx.arity() > 1 || vtx.is<DfgVarPacked>() || vtx.is<DfgVarArray>()) {
headLabel = vtx.srcName(idx);
}
dumpDotEdge(os, edge, headLabel);
}
});

View File

@ -83,19 +83,15 @@ public:
VL_UNCOPYABLE(DfgGraph);
// METHODS
private:
public:
// Add DfgVertex to this graph (assumes not yet contained).
inline void addVertex(DfgVertex& vtx);
// Remove DfgVertex form this graph (assumes it is contained).
inline void removeVertex(DfgVertex& vtx);
public:
// Number of vertices in this graph
size_t size() const { return m_size; }
// Parent module
AstModule* modulep() const { return m_modulep; }
// Name of this graph
const string& name() const { return m_name; }
@ -126,10 +122,20 @@ public:
// Split this graph into individual components (unique sub-graphs with no edges between them).
// Leaves 'this' graph empty.
std::vector<std::unique_ptr<DfgGraph>> splitIntoComponents();
std::vector<std::unique_ptr<DfgGraph>> splitIntoComponents(std::string label);
// Apply the given function to all vertices in the graph. The function return value indicates
// that a change has been made to the graph. Repeat until no changes reported.
// Extract cyclic sub-graphs from 'this' graph. Cyclic sub-graphs are those that contain at
// least one strongly connected component (SCC) plus any other vertices that feed or sink from
// the SCCs, up to a variable boundary. This means that the returned graphs are guaranteed to
// be cyclic, but they are not guaranteed to be strongly connected (however, they are always
// at least weakly connected). Trivial SCCs that are acyclic (i.e.: vertices that are not part
// of a cycle) are left in 'this' graph. This means that at the end 'this' graph is guaranteed
// to be a DAG (acyclic). 'this' will not necessarily be a connected graph at the end, even if
// it was originally connected.
std::vector<std::unique_ptr<DfgGraph>> extractCyclicComponents(std::string label);
// Apply the given function to all vertices in the graph. The function return value
// indicates that a change has been made to the graph. Repeat until no changes reported.
void runToFixedPoint(std::function<bool(DfgVertex&)> f);
// Dump graph in Graphviz format into the given stream 'os'. 'label' is added to the name of
@ -653,6 +659,26 @@ public:
DfgVertexVariadic::resetSources();
}
// Remove undriven sources
void packSources() {
// Grab and reset the driver data
std::vector<DriverData> driverData{std::move(m_driverData)};
// Grab and unlink the sources
std::vector<DfgVertex*> sources{arity()};
forEachSourceEdge([&](DfgEdge& edge, size_t idx) {
sources[idx] = edge.sourcep();
edge.unlinkSource();
});
DfgVertexVariadic::resetSources();
// Add back the driven sources
for (size_t i = 0; i < sources.size(); ++i) {
if (!sources[i]) continue;
addDriver(driverData[i].first, driverData[i].second, sources[i]);
}
}
FileLine* driverFileLine(size_t idx) const { return m_driverData[idx].first; }
uint32_t driverLsb(size_t idx) const { return m_driverData[idx].second; }
@ -692,6 +718,26 @@ public:
DfgVertexVariadic::resetSources();
}
// Remove undriven sources
void packSources() {
// Grab and reset the driver data
std::vector<DriverData> driverData{std::move(m_driverData)};
// Grab and unlink the sources
std::vector<DfgVertex*> sources{arity()};
forEachSourceEdge([&](DfgEdge& edge, size_t idx) {
sources[idx] = edge.sourcep();
edge.unlinkSource();
});
DfgVertexVariadic::resetSources();
// Add back the driven sources
for (size_t i = 0; i < sources.size(); ++i) {
if (!sources[i]) continue;
addDriver(driverData[i].first, driverData[i].second, sources[i]);
}
}
FileLine* driverFileLine(size_t idx) const { return m_driverData[idx].first; }
uint32_t driverIndex(size_t idx) const { return m_driverData[idx].second; }

View File

@ -264,27 +264,43 @@ void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label) {
// Build the DFG of this module
const std::unique_ptr<DfgGraph> dfg{V3DfgPasses::astToDfg(*modp, ctx)};
if (dumpDfg() >= 9) dfg->dumpDotFilePrefixed(ctx.prefix() + "whole-input");
if (dumpDfg() >= 8) dfg->dumpDotFilePrefixed(ctx.prefix() + "whole-input");
// Split the DFG into independent components
const std::vector<std::unique_ptr<DfgGraph>>& components = dfg->splitIntoComponents();
// Extract the cyclic sub-graphs. We do this because a lot of the optimizations assume a
// DAG, and large, mostly acyclic graphs could not be optimized due to the presence of
// small cycles.
const std::vector<std::unique_ptr<DfgGraph>>& cyclicComponents
= dfg->extractCyclicComponents("cyclic");
// For each component
for (auto& component : components) {
// Split the remaining acyclic DFG into [weakly] connected components
const std::vector<std::unique_ptr<DfgGraph>>& acyclicComponents
= dfg->splitIntoComponents("acyclic");
// Quick sanity check
UASSERT_OBJ(dfg->size() == 0, nodep, "DfgGraph should have become empty");
// For each cyclic component
for (auto& component : cyclicComponents) {
if (dumpDfg() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source");
// TODO: Apply optimizations safe for cyclic graphs
// Add back under the main DFG (we will convert everything back in one go)
dfg->addGraph(*component);
}
// For each acyclic component
for (auto& component : acyclicComponents) {
if (dumpDfg() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source");
// Reverse topologically sort the component
const bool acyclic = component->sortTopologically(/* reverse: */ true);
// Optimize the component (iff it is not cyclic)
if (VL_LIKELY(acyclic)) {
V3DfgPasses::optimize(*component, ctx);
} else if (dumpDfg() >= 7) {
component->dumpDotFilePrefixed(ctx.prefix() + "cyclic");
}
// Add back under the main DFG (we will convert back in one go)
UASSERT_OBJ(acyclic, nodep, "Supposedly acyclic graph is cyclic");
// Optimize the component
V3DfgPasses::optimize(*component, ctx);
// Add back under the main DFG (we will convert everything back in one go)
dfg->addGraph(*component);
}
// Convert back to Ast
if (dumpDfg() >= 9) dfg->dumpDotFilePrefixed(ctx.prefix() + "whole-optimized");
if (dumpDfg() >= 8) dfg->dumpDotFilePrefixed(ctx.prefix() + "whole-optimized");
AstModule* const resultModp = V3DfgPasses::dfgToAst(*dfg, ctx);
UASSERT_OBJ(resultModp == modp, modp, "Should be the same module");
}

View File

@ -4,16 +4,41 @@
// any use, without warranty, 2022 by Geza Lore.
// SPDX-License-Identifier: CC0-1.0
module t (/*AUTOARG*/
// Inputs
clk
);
input clk;
// verilator lint_off UNOPTFLAT
module t (
input wire i,
output wire o
);
wire a;
wire b;
wire c;
wire d;
assign a = b + 1'b1;
assign c = i + 1'b1;
assign d = c + 1'b1;
assign a = b + d;
assign b = a + 1'b1;
wire p;
wire q;
wire r;
wire s;
assign p = i + 1'b1;
assign q = p + 1'b1;
assign r = s ^ q;
assign s = r + 1'b1;
wire x;
wire y;
wire z;
wire w;
assign x = y ^ i;
assign y = x;
assign z = w;
assign w = y & z;
assign o = b | x;
endmodule