Speed up TSP sort implementation

- More efficient comparison by pre-computing sorting keys.
- Remove work items in algorithms known to be redundant earlier.
  This greatly reduces data structure sizes.
- Use V3GraphVertex->user() for state tracking instead of unordered_map
  while both of these are constant time, they do add up.
- In `makeMinSpanningTree`, instead of batch inserting outgoing edges of
  each visited vertex into an ordered set, keep an ordered set of sorted
  vectors of edges. This reduces the size of the ordered set
  significantly (it is now O(V) rather than O(E), and as the subject
  graph is a complete graph, V ~ sqrt(E), so this is a significant gain).
- Use a vector + sorting in `perfectMatching` instead of an ordered set.
  This is faster on large working sets.

This yields 3.8x speedup on the variable order pass and overall 14%
verilation speed gain on a large design.
This commit is contained in:
Geza Lore 2021-12-20 15:16:48 +00:00
parent 9a8c878f2d
commit 2ba9eb4228
2 changed files with 149 additions and 109 deletions

View File

@ -282,7 +282,7 @@ protected:
bool m_cutable; // Interconnect may be broken in order sorting bool m_cutable; // Interconnect may be broken in order sorting
union { union {
void* m_userp; // Marker for some algorithms void* m_userp; // Marker for some algorithms
uint32_t m_user; // Marker for some algorithms uint64_t m_user; // Marker for some algorithms
}; };
// METHODS // METHODS
void init(V3Graph* graphp, V3GraphVertex* fromp, V3GraphVertex* top, int weight, void init(V3Graph* graphp, V3GraphVertex* fromp, V3GraphVertex* top, int weight,
@ -326,8 +326,8 @@ public:
void cutable(bool cutable) { m_cutable = cutable; } void cutable(bool cutable) { m_cutable = cutable; }
void userp(void* user) { m_userp = user; } void userp(void* user) { m_userp = user; }
void* userp() const { return m_userp; } void* userp() const { return m_userp; }
void user(uint32_t user) { m_user = user; } void user(uint64_t user) { m_user = user; }
uint32_t user() const { return m_user; } uint64_t user() const { return m_user; }
V3GraphVertex* fromp() const { return m_fromp; } V3GraphVertex* fromp() const { return m_fromp; }
V3GraphVertex* top() const { return m_top; } V3GraphVertex* top() const { return m_top; }
V3GraphVertex* closerp(GraphWay way) const { return way.forward() ? fromp() : top(); } V3GraphVertex* closerp(GraphWay way) const { return way.forward() ? fromp() : top(); }

View File

@ -28,21 +28,21 @@
#include "V3Graph.h" #include "V3Graph.h"
#include "V3TSP.h" #include "V3TSP.h"
#include <algorithm>
#include <cmath> #include <cmath>
#include <list> #include <list>
#include <memory> #include <memory>
#include <set>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector>
#include <unordered_set> #include <unordered_set>
#include <unordered_map> #include <unordered_map>
#include <vector>
//###################################################################### //######################################################################
// Support classes // Support classes
namespace V3TSP { namespace V3TSP {
static unsigned edgeIdNext = 0; static uint32_t edgeIdNext = 0;
static void selfTestStates(); static void selfTestStates();
static void selfTestString(); static void selfTestString();
@ -73,6 +73,8 @@ public:
// TYPES // TYPES
using Vertex = TspVertexTmpl<T_Key>; using Vertex = TspVertexTmpl<T_Key>;
enum VertexState : uint32_t { CLEAR = 0, MST_VISITED = 1, UNMATCHED_ODD = 2 };
// MEMBERS // MEMBERS
std::unordered_map<T_Key, Vertex*> m_vertices; // T_Key to Vertex lookup map std::unordered_map<T_Key, Vertex*> m_vertices; // T_Key to Vertex lookup map
@ -94,7 +96,10 @@ public:
// a matched pairs of opposite-directional edges to represent // a matched pairs of opposite-directional edges to represent
// each non-directional edge: // each non-directional edge:
void addEdge(const T_Key& from, const T_Key& to, int cost) { void addEdge(const T_Key& from, const T_Key& to, int cost) {
#if VL_DEBUG // Hot, so only in debug
UASSERT(from != to, "Adding edge would form a loop"); UASSERT(from != to, "Adding edge would form a loop");
UASSERT(cost >= 0, "Negative weight edge");
#endif
Vertex* const fp = findVertex(from); Vertex* const fp = findVertex(from);
Vertex* const tp = findVertex(to); Vertex* const tp = findVertex(to);
@ -102,12 +107,20 @@ public:
// The only time we may create duplicate edges is when // The only time we may create duplicate edges is when
// combining the MST with the perfect-matched pairs, // combining the MST with the perfect-matched pairs,
// and in that case, we want to permit duplicate edges. // and in that case, we want to permit duplicate edges.
const unsigned edgeId = ++V3TSP::edgeIdNext; const uint32_t edgeId = ++V3TSP::edgeIdNext;
// Record the 'id' which identifies a single bidir edge // We want to be able to compare edges quickly for a total
// in the user field of each V3GraphEdge: // ordering, so pre-compute a sorting key and store it in
(new V3GraphEdge(this, fp, tp, cost))->user(edgeId); // the edge user field. We also want easy access to the 'id'
(new V3GraphEdge(this, tp, fp, cost))->user(edgeId); // which uniquely identifies a single bidir edge. Luckily we
// can do both efficiently.
const uint64_t userValue = (static_cast<uint64_t>(cost) << 32) | edgeId;
(new V3GraphEdge(this, fp, tp, cost))->user(userValue);
(new V3GraphEdge(this, tp, fp, cost))->user(userValue);
}
inline static uint32_t getEdgeId(const V3GraphEdge* edgep) {
return static_cast<uint32_t>(edgep->user());
} }
bool empty() const { return m_vertices.empty(); } bool empty() const { return m_vertices.empty(); }
@ -118,100 +131,121 @@ public:
return vertices; return vertices;
} }
class EdgeCmp final { private:
// Provides a deterministic compare for outgoing V3GraphEdge's // We will keep sorted lists of edges as vectors
// to be used in Prim's algorithm below. Also used in the using EdgeList = std::vector<V3GraphEdge*>;
// perfectMatching() routine.
public:
// CONSTRUCTORS
EdgeCmp() = default;
// METHODS
bool operator()(const V3GraphEdge* ap, const V3GraphEdge* bp) {
const int aCost = ap->weight();
const int bCost = bp->weight();
// Sort first on cost, lowest cost edges first:
if (aCost < bCost) return true;
if (bCost < aCost) return false;
// Costs are equal. Compare edgeId's which should be unique.
return ap->user() < bp->user();
}
private: inline static bool edgeCmp(const V3GraphEdge* ap, const V3GraphEdge* bp) {
VL_UNCOPYABLE(EdgeCmp); // We pre-computed these when adding the edge to sort first by cost, then by identity
return ap->user() > bp->user();
}
struct EdgeListCmp final {
bool operator()(const EdgeList* ap, const EdgeList* bp) {
// Simply compare heads
return edgeCmp(bp->back(), ap->back());
}
}; };
static Vertex* castVertexp(V3GraphVertex* vxp) { return dynamic_cast<Vertex*>(vxp); } inline static Vertex* castVertexp(V3GraphVertex* vxp) { return static_cast<Vertex*>(vxp); }
public:
// From *this, populate *mstp with the minimum spanning tree. // From *this, populate *mstp with the minimum spanning tree.
// *mstp must be initially empty. // *mstp must be initially empty.
void makeMinSpanningTree(TspGraphTmpl* mstp) { void makeMinSpanningTree(TspGraphTmpl* mstp) {
UASSERT(mstp->empty(), "Output graph must start empty"); UASSERT(mstp->empty(), "Output graph must start empty");
// Use Prim's algorithm to efficiently construct the MST. // Use Prim's algorithm to efficiently construct the MST.
std::unordered_set<Vertex*> visited_set;
EdgeCmp cmp; uint32_t vertCount = 0;
using PendingEdgeSet = std::set<V3GraphEdge*, EdgeCmp&>;
// This is the set of pending edges from visited to unvisited
// nodes.
PendingEdgeSet pendingEdges(cmp);
vluint32_t vertCount = 0;
for (V3GraphVertex* vxp = verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { for (V3GraphVertex* vxp = verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
mstp->addVertex(castVertexp(vxp)->key()); mstp->addVertex(castVertexp(vxp)->key());
vertCount++; vertCount++;
} }
// Choose an arbitrary start vertex and visit it; // Allocate storage for per vertex edge lists up front.
// all incident edges from this vertex go into a pending edge set. std::vector<EdgeList> allocatedEdgeLists{vertCount};
Vertex* const start_vertexp = castVertexp(verticesBeginp());
visited_set.insert(start_vertexp); // Index of vertex in visitation order (used for indexing allocatedEdgeLists)
for (V3GraphEdge* edgep = start_vertexp->outBeginp(); edgep; edgep = edgep->outNextp()) { uint32_t vertIdx = 0;
pendingEdges.insert(edgep);
} // We keep pending edges as a sorted set of sorted vectors. This allows us to find the
// lowest cost edge quickly, while also reducing the cost of inserting batches of new
// edges, which is what we need in this algorithm.
std::set<EdgeList*, EdgeListCmp> pendingEdgeListps;
const auto visit = [&](V3GraphVertex* vtxp) {
#ifdef VL_DEBUG // Very hot, so only in debug
UASSERT(vtxp->user() == VertexState::CLEAR, "Vertex visited twice");
#endif
// Mark vertex as visited
vtxp->user(VertexState::MST_VISITED);
// Allocate new edge list
EdgeList* const newEdgesp = &allocatedEdgeLists[vertIdx++];
// Gather out edges of this vertex
for (V3GraphEdge* edgep = vtxp->outBeginp(); edgep; edgep = edgep->outNextp()) {
// Don't add edges leading to vertices we already visited. This is a highly
// connected graph, so this greatly reduces the cost of maintaining the pending
// set.
if (edgep->top()->user() == VertexState::MST_VISITED) continue;
newEdgesp->push_back(edgep);
}
// If no relevant out edges, then we are done
if (newEdgesp->empty()) return;
// Sort new edge list
std::sort(newEdgesp->begin(), newEdgesp->end(), edgeCmp);
// Add edge list to pending set
pendingEdgeListps.insert(newEdgesp);
};
// To start, choose an arbitrary vertex and visit it.
visit(verticesBeginp());
// Repeatedly find the least costly edge in the pending set. // Repeatedly find the least costly edge in the pending set.
// If it connects to an unvisited node, visit that node and update // If it connects to an unvisited node, visit that node and update
// the pending edge set. If it connects to an already visited node, // the pending edge set. If it connects to an already visited node,
// discard it and repeat again. // discard it and repeat again.
unsigned edges_made = 0; while (!pendingEdgeListps.empty()) {
while (!pendingEdges.empty()) { // Grab lowest cost edge list
const auto firstIt = pendingEdges.cbegin(); auto it = pendingEdgeListps.begin();
const V3GraphEdge* bestEdgep = *firstIt;
pendingEdges.erase(firstIt);
// bestEdgep->fromp() should be already seen // Grab lowest cost edge
Vertex* const from_vertexp = castVertexp(bestEdgep->fromp()); EdgeList* const bestEdgeListp = *it;
UASSERT(visited_set.find(from_vertexp) != visited_set.end(), "Can't find vertex"); const V3GraphEdge* const bestEdgep = bestEdgeListp->back();
// If the neighbor is not yet visited, visit it and add its edges // Remove the lowest cost edge list. We will remove its lowest cost element, and either
// to the pending set. // we are done with (if it had a single element) it in which case it will be discarded,
// or the cost of the new head element might be different, so we will need to re-insert
// it in the right place. In either case, it needs to be removed.
pendingEdgeListps.erase(it);
// If the lowest cost edge list is not a singleton list, then pop the lowest cost
// edge and re-insert the remaining edge list into the pending set.
if (bestEdgeListp->size() > 1) {
bestEdgeListp->pop_back();
pendingEdgeListps.insert(bestEdgeListp);
}
// Grab the target vertex
Vertex* const neighborp = castVertexp(bestEdgep->top()); Vertex* const neighborp = castVertexp(bestEdgep->top());
if (visited_set.find(neighborp) == visited_set.end()) {
const int bestCost = bestEdgep->weight(); // If the neighbour is not yet visited
UINFO(6, "bestCost = " << bestCost << " from " << from_vertexp->key() << " to " if (neighborp->user() == VertexState::CLEAR) {
<< neighborp->key() << endl); // Visit it
visit(neighborp);
// Create the edge in our output MST graph // Create the edge in our output MST graph
mstp->addEdge(from_vertexp->key(), neighborp->key(), bestCost); Vertex* const from_vertexp = castVertexp(bestEdgep->fromp());
edges_made++; mstp->addEdge(from_vertexp->key(), neighborp->key(), bestEdgep->weight());
// Mark this vertex as visited #if VL_DEBUG // Very hot loop, so only in debug
visited_set.insert(neighborp); UASSERT(from_vertexp->user() == MST_VISITED,
"bestEdgep->fromp() should be already seen");
// Update the pending edges with new edges #endif
for (V3GraphEdge* edgep = neighborp->outBeginp(); edgep;
edgep = edgep->outNextp()) {
pendingEdges.insert(edgep);
}
} else {
UINFO(6,
"Discarding edge to already-visited neighbor " << neighborp->key() << endl);
} }
} }
UASSERT(edges_made + 1 == vertCount, "Algorithm failed"); UASSERT(vertIdx == vertCount, "Should have visited all vertices");
UASSERT(visited_set.size() == vertCount, "Algorithm failed");
} }
// Populate *outp with a minimal perfect matching of *this. // Populate *outp with a minimal perfect matching of *this.
@ -219,16 +253,14 @@ public:
void perfectMatching(const std::vector<T_Key>& oddKeys, TspGraphTmpl* outp) { void perfectMatching(const std::vector<T_Key>& oddKeys, TspGraphTmpl* outp) {
UASSERT(outp->empty(), "Output graph must start empty"); UASSERT(outp->empty(), "Output graph must start empty");
std::list<Vertex*> odds = keysToVertexList(oddKeys); const std::list<Vertex*>& odds = keysToVertexList(oddKeys);
std::unordered_set<Vertex*> unmatchedOdds;
using VertexListIt = typename std::list<Vertex*>::iterator;
for (VertexListIt it = odds.begin(); it != odds.end(); ++it) {
outp->addVertex((*it)->key());
unmatchedOdds.insert(*it);
}
UASSERT(odds.size() % 2 == 0, "number of odd-order nodes should be even"); UASSERT(odds.size() % 2 == 0, "number of odd-order nodes should be even");
for (Vertex* const vtxp : odds) {
outp->addVertex(vtxp->key());
vtxp->user(VertexState::UNMATCHED_ODD);
}
// TODO: The true Chrisofides algorithm calls for minimum-weight // TODO: The true Chrisofides algorithm calls for minimum-weight
// perfect matching. Instead, we have a simple greedy algorithm // perfect matching. Instead, we have a simple greedy algorithm
// which might get close to the minimum, maybe, with luck? // which might get close to the minimum, maybe, with luck?
@ -241,46 +273,54 @@ public:
// ----- // -----
// Reuse the comparator from Prim's routine. The logic is the same // Gather and sort all edges. We use a vector then sort, because this is faster than a
// here. Note that the two V3GraphEdge's representing a single // sorted set. Reuse the comparator from Prim's routine (note it a 'greater', not a
// bidir edge will collide in the pendingEdges set here, but this // 'lesser' comparator). The logic is the same here.
// is OK, we'll ignore the direction on the edge anyway. //
EdgeCmp cmp; // Note that there are two V3GraphEdge's representing a single bidir edge. While we could
using PendingEdgeSet = std::set<V3GraphEdge*, EdgeCmp&>; // just add both to the pending list and get the same result, we will only add one (based
PendingEdgeSet pendingEdges(cmp); // on fast pointer comparison - this still yields deterministic results), in order to
// reduce the size of the working set.
std::vector<V3GraphEdge*> pendingEdges;
for (VertexListIt it = odds.begin(); it != odds.end(); ++it) { for (Vertex* const fromp : odds) {
for (V3GraphEdge* edgep = (*it)->outBeginp(); edgep; edgep = edgep->outNextp()) { for (V3GraphEdge* edgep = fromp->outBeginp(); edgep; edgep = edgep->outNextp()) {
pendingEdges.insert(edgep); Vertex* const top = castVertexp(edgep->top());
// There are two edges (in both directions) between these two vertices. Keep one.
if (fromp > top) continue;
// We only care about edges between the odd-order vertices
if (top->user() != VertexState::UNMATCHED_ODD) continue;
// Add to candidate list
pendingEdges.push_back(edgep);
} }
} }
// Sort reverse iterators. This yields ascending order with a 'greater' comparator.
std::sort(pendingEdges.rbegin(), pendingEdges.rend(), edgeCmp);
// Iterate over all edges, in order from low to high cost. // Iterate over all edges, in order from low to high cost.
// For any edge whose ends are both odd-order vertices which // For any edge whose ends are both odd-order vertices which
// haven't been matched yet, match them. // haven't been matched yet, match them.
for (typename PendingEdgeSet::iterator it = pendingEdges.begin(); it != pendingEdges.end(); for (V3GraphEdge* const edgep : pendingEdges) {
++it) { Vertex* const fromp = castVertexp(edgep->fromp());
Vertex* const fromp = castVertexp((*it)->fromp()); Vertex* const top = castVertexp(edgep->top());
Vertex* const top = castVertexp((*it)->top()); if (fromp->user() == VertexState::UNMATCHED_ODD
if ((unmatchedOdds.find(fromp) != unmatchedOdds.end()) && top->user() == VertexState::UNMATCHED_ODD) {
&& (unmatchedOdds.find(top) != unmatchedOdds.end())) { outp->addEdge(fromp->key(), top->key(), edgep->weight());
outp->addEdge(fromp->key(), top->key(), (*it)->weight()); fromp->user(VertexState::CLEAR);
unmatchedOdds.erase(fromp); top->user(VertexState::CLEAR);
unmatchedOdds.erase(top);
} }
} }
UASSERT(unmatchedOdds.empty(), "Algorithm should have processed all vertices");
} }
void combineGraph(const TspGraphTmpl& g) { void combineGraph(const TspGraphTmpl& g) {
std::unordered_set<vluint32_t> edges_done; std::unordered_set<uint32_t> edges_done;
for (V3GraphVertex* vxp = g.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { for (V3GraphVertex* vxp = g.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
const Vertex* const fromp = castVertexp(vxp); const Vertex* const fromp = castVertexp(vxp);
for (V3GraphEdge* edgep = fromp->outBeginp(); edgep; edgep = edgep->outNextp()) { for (V3GraphEdge* edgep = fromp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const Vertex* const top = castVertexp(edgep->top()); const Vertex* const top = castVertexp(edgep->top());
if (edges_done.find(edgep->user()) == edges_done.end()) { if (edges_done.insert(getEdgeId(edgep)).second) {
addEdge(fromp->key(), top->key(), edgep->weight()); addEdge(fromp->key(), top->key(), edgep->weight());
edges_done.insert(edgep->user());
} }
} }
} }
@ -298,7 +338,7 @@ public:
// Look for an arbitrary edge we've not yet marked // Look for an arbitrary edge we've not yet marked
for (V3GraphEdge* edgep = cur_vertexp->outBeginp(); edgep; edgep = edgep->outNextp()) { for (V3GraphEdge* edgep = cur_vertexp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const vluint32_t edgeId = edgep->user(); const vluint32_t edgeId = getEdgeId(edgep);
if (markedEdgesp->end() == markedEdgesp->find(edgeId)) { if (markedEdgesp->end() == markedEdgesp->find(edgeId)) {
// This edge is not yet marked, so follow it. // This edge is not yet marked, so follow it.
markedEdgesp->insert(edgeId); markedEdgesp->insert(edgeId);
@ -322,7 +362,7 @@ public:
recursed = false; recursed = false;
// Look for an arbitrary edge at vxp we've not yet marked // Look for an arbitrary edge at vxp we've not yet marked
for (V3GraphEdge* edgep = vxp->outBeginp(); edgep; edgep = edgep->outNextp()) { for (V3GraphEdge* edgep = vxp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const vluint32_t edgeId = edgep->user(); const vluint32_t edgeId = getEdgeId(edgep);
if (markedEdgesp->end() == markedEdgesp->find(edgeId)) { if (markedEdgesp->end() == markedEdgesp->find(edgeId)) {
UINFO(6, "Recursing.\n"); UINFO(6, "Recursing.\n");
findEulerTourRecurse(markedEdgesp, vxp, sortedOutp); findEulerTourRecurse(markedEdgesp, vxp, sortedOutp);
@ -348,7 +388,7 @@ public:
os << " " << tspvp->key() << '\n'; os << " " << tspvp->key() << '\n';
for (V3GraphEdge* edgep = tspvp->outBeginp(); edgep; edgep = edgep->outNextp()) { for (V3GraphEdge* edgep = tspvp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const Vertex* const neighborp = castVertexp(edgep->top()); const Vertex* const neighborp = castVertexp(edgep->top());
os << " has edge " << edgep->user() << " to " << neighborp->key() << '\n'; os << " has edge " << getEdgeId(edgep) << " to " << neighborp->key() << '\n';
} }
} }
} }