DfgPeephole: Use a work list driven algorithm for speed

Replace the 'run to fixed point' algorithm with a work list driven
approach. Instead of marking the graph as changed, we explicitly add
vertices to the work list, to be visited, when a vertex is changed. This
improves both memory locality (as the work list is processed in last in
first out order), and removed unnecessary visitations when only a few
nodes changes.
This commit is contained in:
Geza Lore 2022-11-04 15:50:20 +00:00
parent 21926eeb6b
commit fb9ec03c3f
3 changed files with 251 additions and 166 deletions

View File

@ -375,6 +375,22 @@ public:
return *storagep;
}
// Set user data, becomes current.
template <typename T>
typename std::enable_if<sizeof(T) <= sizeof(void*), void>::type setUser(T value) {
static_assert(sizeof(T) <= sizeof(UserDataStorage),
"Size of user data type 'T' is too large for allocated storage");
static_assert(alignof(T) <= alignof(UserDataStorage),
"Alignment of user data type 'T' is larger than allocated storage");
T* const storagep = reinterpret_cast<T*>(&m_userDataStorage);
const uint32_t userCurrent = m_graphp->m_userCurrent;
#if VL_DEBUG
UASSERT_OBJ(userCurrent, this, "DfgVertex user data used without reserving");
#endif
m_userCnt = userCurrent;
*storagep = value;
}
// Width of result
uint32_t width() const {
// This is a hot enough function that this is an expensive check, so in debug build only.

File diff suppressed because it is too large Load Diff

View File

@ -31,9 +31,6 @@ module t (
assign array[0] = (rand_a << 32) | (rand_a >> 32);
assign array[1] = (rand_a << 16) | (rand_a >> 48);
// x, but with evaluation slightly delayed in DfgPeephole
`define DFG(x) (&16'hffff ? (x) : (~x))
`signal(FOLD_UNARY_CLog2, $clog2(const_a));
`signal(FOLD_UNARY_CountOnes, $countones(const_a));
`signal(FOLD_UNARY_IsUnknown, $isunknown(const_a));
@ -150,8 +147,10 @@ module t (
`signal(REPLACE_CONCAT_SEL_BOTTOM_AND_ZERO_WITH_SHIFTL, {rand_a[1:0], 62'd0});
`signal(PUSH_CONCAT_THROUGH_NOTS, {~(rand_a+64'd101), ~(rand_b+64'd101)} );
`signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], `DFG({rand_a[2:1], rand_b})});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {`DFG({rand_b, rand_a[10:3]}), rand_a[2:1]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS_CAT, {rand_a[2:1], rand_b});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_CAT, {rand_b, rand_a[10:3]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_b, rand_a[10:3]}, rand_a[2:1]});
`signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b);
`signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b);
`signal(SWAP_COND_WITH_NOT_CONDITION, (~rand_a[0] & 1'd1) ? rand_a : rand_b);