Internals: Add .dot graph visualization of ThreadSchedule (#3048)

* Move MTaskState to ThreadSchedule

MTaskState does not concern itself with sandbagging, and thus solely contains information related to the finalized schedule, i.e., completion time, thread ID and next MTask on thread.

* Add .dot graph visualization of ThreadSchedule

Follow-up to #2779.

This commit adds the creation of .dot files - used by GraphViz - to visualize how mtasks are statically scheduled across the set of specified threads.
We visualize each thread as a row, with nodes of a row being the mtasks scheduled for the given thread. The width of the mtask nodes are proportional to their cost. MTask dependencies are shown using an edge between the source and sink mtasks.
This commit is contained in:
Morten Borup Petersen 2021-07-06 13:06:00 +02:00 committed by GitHub
parent 2ebed755e6
commit fd0446f481
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 173 additions and 44 deletions

View File

@ -2010,12 +2010,32 @@ class PartPackMTasks;
// (attributes).
class ThreadSchedule final {
public:
// CONSTANTS
static constexpr uint32_t UNASSIGNED = 0xffffffff;
// TYPES
struct MTaskState {
uint32_t completionTime = 0; // Estimated time this mtask will complete
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
};
// MEMBERS
// Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to
// the sequence of MTasks to be executed by that thread.
std::vector<std::vector<const ExecMTask*>> threads;
// Map from MTask to ID of thread it is assigned to.
std::unordered_map<const ExecMTask*, uint32_t> threadId;
// State for each mtask.
std::unordered_map<const ExecMTask*, MTaskState> mtaskState;
uint32_t threadId(const ExecMTask* mtaskp) const {
const auto& it = mtaskState.find(mtaskp);
if (it != mtaskState.end()) {
return it->second.threadId;
} else {
return UNASSIGNED;
}
}
private:
friend class PartPackMTasks;
@ -2026,20 +2046,104 @@ private:
ThreadSchedule(ThreadSchedule&&) = default;
ThreadSchedule& operator=(ThreadSchedule&&) = default;
// Debugging
void dumpDotFile(const string& filename) const;
void dumpDotFilePrefixedAlways(const string& nameComment) const;
public:
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
// test whether its dependencies are ready before starting, and therefore may need to block.
uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const {
const uint32_t thisThreadId = threadId.at(mtaskp);
const uint32_t thisThreadId = threadId(mtaskp);
uint32_t result = 0;
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
if (threadId.at(prevp) != thisThreadId) ++result;
if (threadId(prevp) != thisThreadId) ++result;
}
return result;
}
uint32_t startTime(const ExecMTask* mtaskp) const {
return mtaskState.at(mtaskp).completionTime - mtaskp->cost();
}
uint32_t endTime(const ExecMTask* mtaskp) const {
return mtaskState.at(mtaskp).completionTime;
}
};
//! Variant of dumpDotFilePrefixed without --dump option check
void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const {
dumpDotFile(v3Global.debugFilename(nameComment) + ".dot");
}
void ThreadSchedule::dumpDotFile(const string& filename) const {
// This generates a file used by graphviz, https://www.graphviz.org
const std::unique_ptr<std::ofstream> logp(V3File::new_ofstream(filename));
if (logp->fail()) v3fatal("Can't write " << filename);
auto* depGraph = v3Global.rootp()->execGraphp()->depGraphp();
// Header
*logp << "digraph v3graph {\n";
*logp << " graph[layout=\"neato\" labelloc=t labeljust=l label=\"" << filename << "\"]\n";
*logp << " node[shape=\"rect\" ratio=\"fill\" fixedsize=true]\n";
// Thread labels
*logp << "\n // Threads\n";
const int threadBoxWidth = 2;
for (int i = 0; i < v3Global.opt.threads(); i++) {
*logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth
<< " pos=\"" << (-threadBoxWidth / 2) << "," << -i
<< "!\" style=\"filled\" fillcolor=\"grey\"] \n";
}
// MTask nodes
*logp << "\n // MTasks\n";
// Find minimum cost MTask for scaling MTask node widths
uint32_t minCost = UINT32_MAX;
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost;
}
}
const double minWidth = 2.0;
auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) {
const double startPosX = (minWidth * startTime(mtaskp)) / minCost;
return nodeWidth / minWidth + startPosX;
};
auto emitMTask = [&](const ExecMTask* mtaskp) {
const int thread = threadId(mtaskp);
const double nodeWidth = minWidth * (static_cast<double>(mtaskp->cost()) / minCost);
const double x = mtaskXPos(mtaskp, nodeWidth);
const int y = -thread;
string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp)) + ":"
+ std::to_string(endTime(mtaskp)) + ")" + "\"";
*logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\""
<< x << "," << y << "!\"]\n";
};
// Emit MTasks
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* mtaskp = dynamic_cast<const ExecMTask*>(vxp)) { emitMTask(mtaskp); }
}
// Emit MTask dependency edges
*logp << "\n // MTask dependencies\n";
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
if (const ExecMTask* mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const V3GraphVertex* top = edgep->top();
*logp << " " << vxp->name() << " -> " << top->name() << "\n";
}
}
}
// Trailer
*logp << "}\n";
logp->close();
}
//######################################################################
// PartPackMTasks
@ -2059,16 +2163,7 @@ public:
// thread A checks the end time of an mtask running on thread B. This extra
// "padding" avoids tight "layovers" at cross-thread dependencies.
class PartPackMTasks final {
// CONSTANTS
static constexpr uint32_t UNASSIGNED = 0xffffffff;
// TYPES
struct MTaskState {
uint32_t completionTime = 0; // Estimated time this mtask will complete
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
};
struct MTaskCmp {
bool operator()(const ExecMTask* ap, const ExecMTask* bp) const {
return ap->id() < bp->id();
@ -2080,8 +2175,6 @@ class PartPackMTasks final {
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
std::unordered_map<const ExecMTask*, MTaskState> m_mtaskState; // State for each mtask.
public:
// CONSTRUCTORS
explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(),
@ -2093,9 +2186,10 @@ public:
private:
// METHODS
uint32_t completionTime(const ExecMTask* mtaskp, uint32_t threadId) {
const MTaskState& state = m_mtaskState[mtaskp];
UASSERT(state.threadId != UNASSIGNED, "Mtask should have assigned thread");
uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp,
uint32_t threadId) {
const ThreadSchedule::MTaskState& state = schedule.mtaskState.at(mtaskp);
UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread");
if (threadId == state.threadId) {
// No overhead on same thread
return state.completionTime;
@ -2111,7 +2205,8 @@ private:
// finishes, otherwise we get priority inversions and fail the self
// test.
if (state.nextp) {
const uint32_t successorEndTime = completionTime(state.nextp, state.threadId);
const uint32_t successorEndTime
= completionTime(schedule, state.nextp, state.threadId);
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
sandbaggedEndTime = successorEndTime - 1;
}
@ -2122,10 +2217,10 @@ private:
return sandbaggedEndTime;
}
bool isReady(const ExecMTask* mtaskp) {
bool isReady(ThreadSchedule& schedule, const ExecMTask* mtaskp) {
for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) {
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
if (m_mtaskState[prevp].threadId == UNASSIGNED) {
if (schedule.threadId(prevp) == ThreadSchedule::UNASSIGNED) {
// This predecessor is not assigned yet
return false;
}
@ -2148,12 +2243,9 @@ public:
// Build initial ready list
for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
const ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
if (isReady(mtaskp)) readyMTasks.insert(mtaskp);
if (isReady(schedule, mtaskp)) readyMTasks.insert(mtaskp);
}
// Clear algorithm state
m_mtaskState.clear();
while (!readyMTasks.empty()) {
// For each task in the ready set, compute when it might start
// on each thread (in that thread's local time frame.)
@ -2172,7 +2264,7 @@ public:
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep;
edgep = edgep->inNextp()) {
const ExecMTask* const priorp = dynamic_cast<ExecMTask*>(edgep->fromp());
const uint32_t priorEndTime = completionTime(priorp, threadId);
const uint32_t priorEndTime = completionTime(schedule, priorp, threadId);
if (priorEndTime > timeBegin) timeBegin = priorEndTime;
}
UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin
@ -2197,14 +2289,13 @@ public:
// Update algorithm state
const uint32_t bestEndTime = bestTime + bestMtaskp->cost();
m_mtaskState[bestMtaskp].completionTime = bestEndTime;
m_mtaskState[bestMtaskp].threadId = bestThreadId;
if (!bestThread.empty()) { m_mtaskState[bestThread.back()].nextp = bestMtaskp; }
schedule.mtaskState[bestMtaskp].completionTime = bestEndTime;
schedule.mtaskState[bestMtaskp].threadId = bestThreadId;
if (!bestThread.empty()) { schedule.mtaskState[bestThread.back()].nextp = bestMtaskp; }
busyUntil[bestThreadId] = bestEndTime;
// Add the MTask to the schedule
bestThread.push_back(bestMtaskp);
schedule.threadId[bestMtaskp] = bestThreadId;
// Update the ready list
const size_t erased = readyMTasks.erase(bestMtaskp);
@ -2213,18 +2304,20 @@ public:
edgeOutp = edgeOutp->outNextp()) {
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
// Dependent MTask should not yet be assigned to a thread
UASSERT(m_mtaskState[nextp].threadId == UNASSIGNED,
UASSERT(schedule.threadId(nextp) == ThreadSchedule::UNASSIGNED,
"Tasks after one being assigned should not be assigned yet");
// Dependent MTask should not be ready yet, since dependency is just being assigned
UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp,
"Tasks after one being assigned should not be ready");
if (isReady(nextp)) {
if (isReady(schedule, nextp)) {
readyMTasks.insert(nextp);
UINFO(6, "Inserted " << nextp->name() << " into ready\n");
}
}
}
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule");
return schedule;
}
@ -2258,26 +2351,26 @@ public:
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1);
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2);
UASSERT_SELFTEST(size_t, schedule.threadId.size(), 3);
UASSERT_SELFTEST(size_t, schedule.mtaskState.size(), 3);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t0), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t1), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t2), 1);
UASSERT_SELFTEST(uint32_t, schedule.threadId(t0), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId(t1), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId(t2), 1);
// On its native thread, we see the actual end time for t0:
UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 0), 1000);
// On the other thread, we see a sandbagged end time which does not
// exceed the t1 end time:
UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 1), 1099);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 1), 1099);
// Actual end time on native thread:
UASSERT_SELFTEST(uint32_t, packer.completionTime(t1, 0), 1100);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 0), 1100);
// Sandbagged end time seen on thread 1. Note it does not compound
// with t0's sandbagged time; compounding caused trouble in
// practice.
UASSERT_SELFTEST(uint32_t, packer.completionTime(t1, 1), 1130);
UASSERT_SELFTEST(uint32_t, packer.completionTime(t2, 0), 1229);
UASSERT_SELFTEST(uint32_t, packer.completionTime(t2, 1), 1199);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 1), 1130);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 0), 1229);
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 1), 1199);
}
private:
@ -2645,7 +2738,7 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
// For any dependent mtask that's on another thread, signal one dependency completion.
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgep->top());
if (schedule.threadId.at(nextp) != threadId) {
if (schedule.threadId(nextp) != threadId) {
addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id())
+ ".signalUpstreamDone(even_cycle);\n");
}
@ -2661,7 +2754,7 @@ static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule&
// For each thread, create a function representing its entry point
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
if (thread.empty()) continue;
const uint32_t threadId = schedule.threadId.at(thread.front());
const uint32_t threadId = schedule.threadId(thread.front());
string name = "__Vthread_";
name += cvtToStr(threadId);
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");

36
test_regress/t/t_dotfiles.pl Executable file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env perl
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2021 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
scenarios(vltmt => 1);
# Use a top file which we are sure to be parallelizable
top_filename("t/t_gen_alw.v");
compile(
v_flags2 => ["--debug --debugi 5 --threads 2"]
);
foreach my $dotname ("linkcells", "task_call", "gate_simp", "gate_opt",
"acyc_simp", "orderg_pre", "orderg_acyc", "orderg_order", "orderg_domain",
"ordermv_initial", "ordermv_hazards", "ordermv_contraction",
"ordermv_transitive1", "orderg_done", "ordermv_transitive2", "schedule") {
# Some files with identical prefix are generated multiple times during
# verilation. Ensure that at least one of each $dotname-prefixed file is generated.
@dotFiles = glob("$Self->{obj_dir}/*$dotname.dot");
if (scalar @dotFiles == 0) {
error("Found no dotfiles with pattern *$dotname.dot");
}
foreach my $dotFilename (@dotFiles) {
file_grep($dotFilename, qr/digraph v3graph/);
}
}
ok(1);
1;