diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index f63124957..0bff6bde5 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -2010,12 +2010,32 @@ class PartPackMTasks; // (attributes). class ThreadSchedule final { public: + // CONSTANTS + static constexpr uint32_t UNASSIGNED = 0xffffffff; + + // TYPES + struct MTaskState { + uint32_t completionTime = 0; // Estimated time this mtask will complete + uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to + const ExecMTask* nextp = nullptr; // Next MTask on same thread after this + }; + + // MEMBERS // Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to // the sequence of MTasks to be executed by that thread. std::vector> threads; - // Map from MTask to ID of thread it is assigned to. - std::unordered_map threadId; + // State for each mtask. + std::unordered_map mtaskState; + + uint32_t threadId(const ExecMTask* mtaskp) const { + const auto& it = mtaskState.find(mtaskp); + if (it != mtaskState.end()) { + return it->second.threadId; + } else { + return UNASSIGNED; + } + } private: friend class PartPackMTasks; @@ -2026,20 +2046,104 @@ private: ThreadSchedule(ThreadSchedule&&) = default; ThreadSchedule& operator=(ThreadSchedule&&) = default; + // Debugging + void dumpDotFile(const string& filename) const; + void dumpDotFilePrefixedAlways(const string& nameComment) const; + public: // Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must // test whether its dependencies are ready before starting, and therefore may need to block. uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const { - const uint32_t thisThreadId = threadId.at(mtaskp); + const uint32_t thisThreadId = threadId(mtaskp); uint32_t result = 0; for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) { const ExecMTask* const prevp = dynamic_cast(edgep->fromp()); - if (threadId.at(prevp) != thisThreadId) ++result; + if (threadId(prevp) != thisThreadId) ++result; } return result; } + + uint32_t startTime(const ExecMTask* mtaskp) const { + return mtaskState.at(mtaskp).completionTime - mtaskp->cost(); + } + uint32_t endTime(const ExecMTask* mtaskp) const { + return mtaskState.at(mtaskp).completionTime; + } }; +//! Variant of dumpDotFilePrefixed without --dump option check +void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const { + dumpDotFile(v3Global.debugFilename(nameComment) + ".dot"); +} + +void ThreadSchedule::dumpDotFile(const string& filename) const { + // This generates a file used by graphviz, https://www.graphviz.org + const std::unique_ptr logp(V3File::new_ofstream(filename)); + if (logp->fail()) v3fatal("Can't write " << filename); + auto* depGraph = v3Global.rootp()->execGraphp()->depGraphp(); + + // Header + *logp << "digraph v3graph {\n"; + *logp << " graph[layout=\"neato\" labelloc=t labeljust=l label=\"" << filename << "\"]\n"; + *logp << " node[shape=\"rect\" ratio=\"fill\" fixedsize=true]\n"; + + // Thread labels + *logp << "\n // Threads\n"; + const int threadBoxWidth = 2; + for (int i = 0; i < v3Global.opt.threads(); i++) { + *logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth + << " pos=\"" << (-threadBoxWidth / 2) << "," << -i + << "!\" style=\"filled\" fillcolor=\"grey\"] \n"; + } + + // MTask nodes + *logp << "\n // MTasks\n"; + + // Find minimum cost MTask for scaling MTask node widths + uint32_t minCost = UINT32_MAX; + for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + if (const ExecMTask* mtaskp = dynamic_cast(vxp)) { + minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost; + } + } + const double minWidth = 2.0; + auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) { + const double startPosX = (minWidth * startTime(mtaskp)) / minCost; + return nodeWidth / minWidth + startPosX; + }; + + auto emitMTask = [&](const ExecMTask* mtaskp) { + const int thread = threadId(mtaskp); + const double nodeWidth = minWidth * (static_cast(mtaskp->cost()) / minCost); + const double x = mtaskXPos(mtaskp, nodeWidth); + const int y = -thread; + string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp)) + ":" + + std::to_string(endTime(mtaskp)) + ")" + "\""; + *logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\"" + << x << "," << y << "!\"]\n"; + }; + + // Emit MTasks + for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + if (const ExecMTask* mtaskp = dynamic_cast(vxp)) { emitMTask(mtaskp); } + } + + // Emit MTask dependency edges + *logp << "\n // MTask dependencies\n"; + for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + if (const ExecMTask* mtaskp = dynamic_cast(vxp)) { + for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) { + const V3GraphVertex* top = edgep->top(); + *logp << " " << vxp->name() << " -> " << top->name() << "\n"; + } + } + } + + // Trailer + *logp << "}\n"; + logp->close(); +} + //###################################################################### // PartPackMTasks @@ -2059,16 +2163,7 @@ public: // thread A checks the end time of an mtask running on thread B. This extra // "padding" avoids tight "layovers" at cross-thread dependencies. class PartPackMTasks final { - // CONSTANTS - static constexpr uint32_t UNASSIGNED = 0xffffffff; - // TYPES - struct MTaskState { - uint32_t completionTime = 0; // Estimated time this mtask will complete - uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to - const ExecMTask* nextp = nullptr; // Next MTask on same thread after this - }; - struct MTaskCmp { bool operator()(const ExecMTask* ap, const ExecMTask* bp) const { return ap->id() < bp->id(); @@ -2080,8 +2175,6 @@ class PartPackMTasks final { const uint32_t m_sandbagNumerator; // Numerator padding for est runtime const uint32_t m_sandbagDenom; // Denominator padding for est runtime - std::unordered_map m_mtaskState; // State for each mtask. - public: // CONSTRUCTORS explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(), @@ -2093,9 +2186,10 @@ public: private: // METHODS - uint32_t completionTime(const ExecMTask* mtaskp, uint32_t threadId) { - const MTaskState& state = m_mtaskState[mtaskp]; - UASSERT(state.threadId != UNASSIGNED, "Mtask should have assigned thread"); + uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp, + uint32_t threadId) { + const ThreadSchedule::MTaskState& state = schedule.mtaskState.at(mtaskp); + UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread"); if (threadId == state.threadId) { // No overhead on same thread return state.completionTime; @@ -2111,7 +2205,8 @@ private: // finishes, otherwise we get priority inversions and fail the self // test. if (state.nextp) { - const uint32_t successorEndTime = completionTime(state.nextp, state.threadId); + const uint32_t successorEndTime + = completionTime(schedule, state.nextp, state.threadId); if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) { sandbaggedEndTime = successorEndTime - 1; } @@ -2122,10 +2217,10 @@ private: return sandbaggedEndTime; } - bool isReady(const ExecMTask* mtaskp) { + bool isReady(ThreadSchedule& schedule, const ExecMTask* mtaskp) { for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) { const ExecMTask* const prevp = dynamic_cast(edgeInp->fromp()); - if (m_mtaskState[prevp].threadId == UNASSIGNED) { + if (schedule.threadId(prevp) == ThreadSchedule::UNASSIGNED) { // This predecessor is not assigned yet return false; } @@ -2148,12 +2243,9 @@ public: // Build initial ready list for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { const ExecMTask* const mtaskp = dynamic_cast(vxp); - if (isReady(mtaskp)) readyMTasks.insert(mtaskp); + if (isReady(schedule, mtaskp)) readyMTasks.insert(mtaskp); } - // Clear algorithm state - m_mtaskState.clear(); - while (!readyMTasks.empty()) { // For each task in the ready set, compute when it might start // on each thread (in that thread's local time frame.) @@ -2172,7 +2264,7 @@ public: for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) { const ExecMTask* const priorp = dynamic_cast(edgep->fromp()); - const uint32_t priorEndTime = completionTime(priorp, threadId); + const uint32_t priorEndTime = completionTime(schedule, priorp, threadId); if (priorEndTime > timeBegin) timeBegin = priorEndTime; } UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin @@ -2197,14 +2289,13 @@ public: // Update algorithm state const uint32_t bestEndTime = bestTime + bestMtaskp->cost(); - m_mtaskState[bestMtaskp].completionTime = bestEndTime; - m_mtaskState[bestMtaskp].threadId = bestThreadId; - if (!bestThread.empty()) { m_mtaskState[bestThread.back()].nextp = bestMtaskp; } + schedule.mtaskState[bestMtaskp].completionTime = bestEndTime; + schedule.mtaskState[bestMtaskp].threadId = bestThreadId; + if (!bestThread.empty()) { schedule.mtaskState[bestThread.back()].nextp = bestMtaskp; } busyUntil[bestThreadId] = bestEndTime; // Add the MTask to the schedule bestThread.push_back(bestMtaskp); - schedule.threadId[bestMtaskp] = bestThreadId; // Update the ready list const size_t erased = readyMTasks.erase(bestMtaskp); @@ -2213,18 +2304,20 @@ public: edgeOutp = edgeOutp->outNextp()) { const ExecMTask* const nextp = dynamic_cast(edgeOutp->top()); // Dependent MTask should not yet be assigned to a thread - UASSERT(m_mtaskState[nextp].threadId == UNASSIGNED, + UASSERT(schedule.threadId(nextp) == ThreadSchedule::UNASSIGNED, "Tasks after one being assigned should not be assigned yet"); // Dependent MTask should not be ready yet, since dependency is just being assigned UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp, "Tasks after one being assigned should not be ready"); - if (isReady(nextp)) { + if (isReady(schedule, nextp)) { readyMTasks.insert(nextp); UINFO(6, "Inserted " << nextp->name() << " into ready\n"); } } } + if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule"); + return schedule; } @@ -2258,26 +2351,26 @@ public: UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1); UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2); - UASSERT_SELFTEST(size_t, schedule.threadId.size(), 3); + UASSERT_SELFTEST(size_t, schedule.mtaskState.size(), 3); - UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t0), 0); - UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t1), 0); - UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t2), 1); + UASSERT_SELFTEST(uint32_t, schedule.threadId(t0), 0); + UASSERT_SELFTEST(uint32_t, schedule.threadId(t1), 0); + UASSERT_SELFTEST(uint32_t, schedule.threadId(t2), 1); // On its native thread, we see the actual end time for t0: - UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 0), 1000); // On the other thread, we see a sandbagged end time which does not // exceed the t1 end time: - UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 1), 1099); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 1), 1099); // Actual end time on native thread: - UASSERT_SELFTEST(uint32_t, packer.completionTime(t1, 0), 1100); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 0), 1100); // Sandbagged end time seen on thread 1. Note it does not compound // with t0's sandbagged time; compounding caused trouble in // practice. - UASSERT_SELFTEST(uint32_t, packer.completionTime(t1, 1), 1130); - UASSERT_SELFTEST(uint32_t, packer.completionTime(t2, 0), 1229); - UASSERT_SELFTEST(uint32_t, packer.completionTime(t2, 1), 1199); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 1), 1130); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 0), 1229); + UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 1), 1199); } private: @@ -2645,7 +2738,7 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th // For any dependent mtask that's on another thread, signal one dependency completion. for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) { const ExecMTask* const nextp = dynamic_cast(edgep->top()); - if (schedule.threadId.at(nextp) != threadId) { + if (schedule.threadId(nextp) != threadId) { addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id()) + ".signalUpstreamDone(even_cycle);\n"); } @@ -2661,7 +2754,7 @@ static const std::vector createThreadFunctions(const ThreadSchedule& // For each thread, create a function representing its entry point for (const std::vector& thread : schedule.threads) { if (thread.empty()) continue; - const uint32_t threadId = schedule.threadId.at(thread.front()); + const uint32_t threadId = schedule.threadId(thread.front()); string name = "__Vthread_"; name += cvtToStr(threadId); AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void"); diff --git a/test_regress/t/t_dotfiles.pl b/test_regress/t/t_dotfiles.pl new file mode 100755 index 000000000..31b8ac837 --- /dev/null +++ b/test_regress/t/t_dotfiles.pl @@ -0,0 +1,36 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2021 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vltmt => 1); + +# Use a top file which we are sure to be parallelizable +top_filename("t/t_gen_alw.v"); + +compile( + v_flags2 => ["--debug --debugi 5 --threads 2"] + ); + +foreach my $dotname ("linkcells", "task_call", "gate_simp", "gate_opt", + "acyc_simp", "orderg_pre", "orderg_acyc", "orderg_order", "orderg_domain", + "ordermv_initial", "ordermv_hazards", "ordermv_contraction", + "ordermv_transitive1", "orderg_done", "ordermv_transitive2", "schedule") { + # Some files with identical prefix are generated multiple times during + # verilation. Ensure that at least one of each $dotname-prefixed file is generated. + @dotFiles = glob("$Self->{obj_dir}/*$dotname.dot"); + if (scalar @dotFiles == 0) { + error("Found no dotfiles with pattern *$dotname.dot"); + } + foreach my $dotFilename (@dotFiles) { + file_grep($dotFilename, qr/digraph v3graph/); + } +} + +ok(1); +1;