From f4038e36743bfb5d5860293520df781deedfe598 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 11:41:15 +0100 Subject: [PATCH] Move thread pool and execution profiler into the context. (#3477) Fixes #3454 --- include/verilated.cpp | 58 +++++++++ include/verilated.h | 48 +++++++- include/verilated_profiler.cpp | 43 +++++-- include/verilated_profiler.h | 17 +-- include/verilated_threads.cpp | 54 ++++----- include/verilated_threads.h | 32 ++--- include/verilated_trace.h | 22 ++-- include/verilated_trace_imp.h | 133 ++++++++++----------- src/V3EmitCHeaders.cpp | 6 +- src/V3EmitCMake.cpp | 2 +- src/V3EmitCModel.cpp | 31 +++-- src/V3EmitCSyms.cpp | 24 ++-- src/V3EmitMk.cpp | 2 +- src/V3Trace.cpp | 9 +- test_regress/driver.pl | 8 +- test_regress/t/t_embed1.pl | 3 +- test_regress/t/t_gantt_two.cpp | 43 +++++++ test_regress/t/t_gantt_two.pl | 61 ++++++++++ test_regress/t/t_hier_block_cmake/main.cpp | 5 +- test_regress/t/t_lib_prot_shared.pl | 3 +- test_regress/t/t_threads_crazy.pl | 12 +- test_regress/t/t_threads_crazy_context.pl | 36 ++++++ test_regress/t/t_wrapper_context.cpp | 2 + 23 files changed, 470 insertions(+), 184 deletions(-) create mode 100644 test_regress/t/t_gantt_two.cpp create mode 100755 test_regress/t/t_gantt_two.pl create mode 100755 test_regress/t/t_threads_crazy_context.pl diff --git a/include/verilated.cpp b/include/verilated.cpp index da8599a42..cf1d76d8f 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -66,6 +66,10 @@ #if defined(_WIN32) || defined(__MINGW32__) # include // mkdir #endif + +#ifdef VL_THREADED +# include "verilated_threads.h" +#endif // clang-format on // Max characters in static char string for VL_VALUE_STRING @@ -2428,6 +2432,33 @@ const char* VerilatedContext::timeprecisionString() const VL_MT_SAFE { return vl_time_str(timeprecision()); } +void VerilatedContext::threads(unsigned n) { + if (n == 0) VL_FATAL_MT(__FILE__, __LINE__, "", "%Error: Simulation threads must be >= 1"); + + if (m_threadPool) { + VL_FATAL_MT( + __FILE__, __LINE__, "", + "%Error: Cannot set simulation threads after the thread pool has been created."); + } + +#if VL_THREADED + if (m_threads == n) return; // To avoid unnecessary warnings + m_threads = n; + const unsigned hardwareThreadsAvailable = std::thread::hardware_concurrency(); + if (m_threads > hardwareThreadsAvailable) { + VL_PRINTF_MT("%%Warning: System has %u hardware threads but simulation thread count set " + "to %u. This will likely cause significant slowdown.\n", + hardwareThreadsAvailable, m_threads); + } +#else + if (n > 1) { + VL_PRINTF_MT("%%Warning: Verilator run-time library built without VL_THREADS. Ignoring " + "call to 'VerilatedContext::threads' with argument %u.\n", + n); + } +#endif +} + void VerilatedContext::commandArgs(int argc, const char** argv) VL_MT_SAFE_EXCLUDES(m_argMutex) { const VerilatedLockGuard lock{m_argMutex}; m_args.m_argVec.clear(); // Empty first, then add @@ -2458,6 +2489,33 @@ void VerilatedContext::internalsDump() const VL_MT_SAFE { VerilatedImp::userDump(); } +void VerilatedContext::addModel(VerilatedModel* modelp) { + threadPoolp(); // Ensure thread pool is created, so m_threads cannot change any more + + if (modelp->threads() > m_threads) { + std::ostringstream msg; + msg << "VerilatedContext has " << m_threads << " threads but model '" + << modelp->modelName() << "' (instantiated as '" << modelp->hierName() + << "') was Verilated with --threads " << modelp->threads() << ".\n"; + const std::string str = msg.str(); + VL_FATAL_MT(__FILE__, __LINE__, modelp->hierName(), str.c_str()); + } +} + +VerilatedVirtualBase* VerilatedContext::threadPoolp() { + if (m_threads == 1) return nullptr; +#if VL_THREADED + if (!m_threadPool) m_threadPool.reset(new VlThreadPool{this, m_threads - 1}); +#endif + return m_threadPool.get(); +} + +VerilatedVirtualBase* +VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) { + if (!m_executionProfiler) m_executionProfiler.reset(construct(*this)); + return m_executionProfiler.get(); +} + //====================================================================== // VerilatedContextImp:: Methods - command line diff --git a/include/verilated.h b/include/verilated.h index f9cf79601..bc1d5a3f2 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -252,6 +252,28 @@ public: #endif }; +//========================================================================= +/// Base class of a Verilator generated (Verilated) model. +/// +/// VerilatedModel is a base class of the user facing primary class generated +/// by Verilator. + +class VerilatedModel VL_NOT_FINAL { + VL_UNCOPYABLE(VerilatedModel); + +protected: + explicit VerilatedModel() = default; + virtual ~VerilatedModel() = default; + +public: + /// Returns the hierarchical name of this module instance. + virtual const char* hierName() = 0; + /// Returns the name of this model (the name of the generated model class). + virtual const char* modelName() = 0; + /// Returns the thread level parallelism, this model was Verilated with. Always 1 or higher. + virtual unsigned threads() = 0; +}; + //========================================================================= /// Base class for all Verilated module classes. @@ -266,10 +288,6 @@ public: const char* name() const { return m_namep; } ///< Return name of module }; -/// Declare a module, ala SC_MODULE -#define VL_MODULE(modname) class modname VL_NOT_FINAL : public VerilatedModule -// Not class final in VL_MODULE, as users might be abstracting our models (--hierarchical) - //========================================================================= // Functions overridable by user defines // (Internals however must use VL_PRINTF_MT, which calls these.) @@ -362,6 +380,16 @@ protected: // Implementation details const std::unique_ptr m_impdatap; + // Number of threads to use for simulation (size of m_threadPool + 1 for main thread) +#ifdef VL_THREADED + unsigned m_threads = std::thread::hardware_concurrency(); +#else + const unsigned m_threads = 1; +#endif + // The thread pool shared by all models added to this context + std::unique_ptr m_threadPool; + // The execution profiler shared by all models added to this context + std::unique_ptr m_executionProfiler; // Coverage access std::unique_ptr m_coveragep; // Pointer for coveragep() @@ -495,6 +523,12 @@ public: /// Get time precision as IEEE-standard text const char* timeprecisionString() const VL_MT_SAFE; + /// Get number of threads used for simulation (including the main thread) + unsigned threads() const { return m_threads; } + /// Set number of threads used for simulation (including the main thread) + /// Can only be called before the thread pool is created (before first model is added). + void threads(unsigned n); + /// Allow traces to at some point be enabled (disables some optimizations) void traceEverOn(bool flag) VL_MT_SAFE { if (flag) calcUnusedSigs(true); @@ -517,6 +551,12 @@ public: // But for internal use only return reinterpret_cast(this); } + void addModel(VerilatedModel*); + + VerilatedVirtualBase* threadPoolp(); + VerilatedVirtualBase* + enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)); + // Internal: $dumpfile void dumpfile(const std::string& flag) VL_MT_SAFE_EXCLUDES(m_timeDumpMutex); std::string dumpfile() const VL_MT_SAFE_EXCLUDES(m_timeDumpMutex); diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index 21246827a..d65442f44 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -66,41 +66,66 @@ template static size_t roundUptoMultipleOf(size_t value) { return (value + mask) & ~mask; } -VlExecutionProfiler::VlExecutionProfiler() { +VlExecutionProfiler::VlExecutionProfiler(VerilatedContext& context) + : m_context{context} { // Setup profiling on main thread setupThread(0); } -void VlExecutionProfiler::configure(const VerilatedContext& context) { +void VlExecutionProfiler::configure() { + if (VL_UNLIKELY(m_enabled)) { --m_windowCount; - if (VL_UNLIKELY(m_windowCount == context.profExecWindow())) { + if (VL_UNLIKELY(m_windowCount == m_context.profExecWindow())) { VL_DEBUG_IF(VL_DBG_MSGF("+ profile start collection\n");); clear(); // Clear the profile after the cache warm-up cycles. m_tickBegin = VL_CPU_TICK(); } else if (VL_UNLIKELY(m_windowCount == 0)) { const uint64_t tickEnd = VL_CPU_TICK(); VL_DEBUG_IF(VL_DBG_MSGF("+ profile end\n");); - const std::string& fileName = context.profExecFilename(); + const std::string& fileName = m_context.profExecFilename(); dump(fileName.c_str(), tickEnd); m_enabled = false; } return; } - const uint64_t startReq = context.profExecStart() + 1; // + 1, so we can start at time 0 + const uint64_t startReq = m_context.profExecStart() + 1; // + 1, so we can start at time 0 - if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= context.profExecStart())) { + if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= m_context.profExecStart())) { VL_DEBUG_IF(VL_DBG_MSGF("+ profile start warmup\n");); VL_DEBUG_IF(assert(m_windowCount == 0);); m_enabled = true; - m_windowCount = context.profExecWindow() * 2; + m_windowCount = m_context.profExecWindow() * 2; m_lastStartReq = startReq; } } -void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) { - profilep->setupThread(threadId); +VerilatedVirtualBase* VlExecutionProfiler::construct(VerilatedContext& context) { + VlExecutionProfiler* const selfp = new VlExecutionProfiler{context}; +#if VL_THREADED + if (VlThreadPool* const threadPoolp = static_cast(context.threadPoolp())) { + for (int i = 0; i < threadPoolp->numThreads(); ++i) { + // Data to pass to worker thread initialization + struct Data { + VlExecutionProfiler* const selfp; + const uint32_t threadId; + } data{selfp, static_cast(i + 1)}; + + // Initialize worker thread + threadPoolp->workerp(i)->addTask( + [](void* userp, bool) { + Data* const datap = static_cast(userp); + datap->selfp->setupThread(datap->threadId); + }, + &data); + + // Wait until initializationis complete + threadPoolp->workerp(i)->wait(); + } + } +#endif + return selfp; } void VlExecutionProfiler::setupThread(uint32_t threadId) { diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index f85c95528..61f2813d3 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -33,13 +33,14 @@ #include class VlExecutionProfiler; +class VlThreadPool; //============================================================================= // Macros to simplify generated code #define VL_EXEC_TRACE_ADD_RECORD(vlSymsp) \ - if (VL_UNLIKELY((vlSymsp)->__Vm_executionProfiler.enabled())) \ - (vlSymsp)->__Vm_executionProfiler.addRecord() + if (VL_UNLIKELY((vlSymsp)->__Vm_executionProfilerp->enabled())) \ + (vlSymsp)->__Vm_executionProfilerp->addRecord() //============================================================================= // Return high-precision counter for profiling, or 0x0 if not available @@ -131,7 +132,7 @@ static_assert(std::is_trivially_destructible::value, //============================================================================= // VlExecutionProfiler is for collecting profiling data about model execution -class VlExecutionProfiler final { +class VlExecutionProfiler final : public VerilatedVirtualBase { // CONSTANTS // In order to try to avoid dynamic memory allocations during the actual profiling phase, @@ -149,6 +150,7 @@ class VlExecutionProfiler final { using ExecutionTrace = std::vector; // STATE + VerilatedContext& m_context; // The context this profiler is under static VL_THREAD_LOCAL ExecutionTrace t_trace; // thread-local trace buffers mutable VerilatedMutex m_mutex; // Map from thread id to &t_trace of given thread @@ -162,7 +164,8 @@ class VlExecutionProfiler final { public: // CONSTRUCTOR - VlExecutionProfiler(); + explicit VlExecutionProfiler(VerilatedContext& context); + virtual ~VlExecutionProfiler() = default; // METHODS @@ -174,7 +177,7 @@ public: return t_trace.back(); } // Configure profiler (called in beginning of 'eval') - void configure(const VerilatedContext&); + void configure(); // Setup profiling on a particular thread; void setupThread(uint32_t threadId); // Clear all profiling data @@ -182,8 +185,8 @@ public: // Write profiling data into file void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex); - // Called via VlStartWorkerCb in VlWorkerThread::startWorker - static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId); + // Passed to VerilatedContext to create the VlExecutionProfiler profiler instance + static VerilatedVirtualBase* construct(VerilatedContext& context); }; //============================================================================= diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index a78ea9ae6..6696d738d 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -47,11 +47,9 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) //============================================================================= // VlWorkerThread -VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) +VlWorkerThread::VlWorkerThread(VerilatedContext* contextp) : m_ready_size{0} - , m_cthread{startWorker, this, threadId, profilerp, startCb} - , m_contextp{contextp} {} + , m_cthread{startWorker, this, contextp} {} VlWorkerThread::~VlWorkerThread() { shutdown(); @@ -59,47 +57,49 @@ VlWorkerThread::~VlWorkerThread() { m_cthread.join(); } -void VlWorkerThread::shutdownTask(void*, bool) { +static void shutdownTask(void*, bool) { // Deliberately empty, we use the address of this function as a magic number } +void VlWorkerThread::shutdown() { addTask(shutdownTask, nullptr); } + +void VlWorkerThread::wait() { + // Enqueue a task that sets this flag. Execution is in-order so this ensures completion. + std::atomic flag{false}; + addTask([](void* flagp, bool) { static_cast*>(flagp)->store(true); }, &flag); + // Spin wait + for (unsigned i = 0; i < VL_LOCK_SPINS; ++i) { + if (flag.load()) return; + VL_CPU_RELAX(); + } + // Yield wait + while (!flag.load()) std::this_thread::yield(); +} + void VlWorkerThread::workerLoop() { ExecRec work; + // Wait for the first task without spinning, in case the thread is never actually used. + dequeWork(&work); + while (true) { - dequeWork(&work); if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break; work.m_fnp(work.m_selfp, work.m_evenCycle); + // Wait for next task with spinning. + dequeWork(&work); } } -void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { - Verilated::threadContextp(workerp->m_contextp); - if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId); +void VlWorkerThread::startWorker(VlWorkerThread* workerp, VerilatedContext* contextp) { + Verilated::threadContextp(contextp); workerp->workerLoop(); } //============================================================================= // VlThreadPool -VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { - // --threads N passes nThreads=N-1, as the "main" threads counts as 1 - ++nThreads; - const unsigned cpus = std::thread::hardware_concurrency(); - if (cpus < nThreads) { - static int warnedOnce = 0; - if (!warnedOnce++) { - VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with" - " --threads %d; may run slow.\n", - cpus, nThreads); - } - } - // Create worker threads - for (uint32_t threadId = 1; threadId < nThreads; ++threadId) { - m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb}); - } +VlThreadPool::VlThreadPool(VerilatedContext* contextp, unsigned nThreads) { + for (unsigned i = 0; i < nThreads; ++i) m_workers.push_back(new VlWorkerThread{contextp}); } VlThreadPool::~VlThreadPool() { diff --git a/include/verilated_threads.h b/include/verilated_threads.h index eeb8f9342..fdb45580e 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -60,9 +60,6 @@ using VlSelfP = void*; using VlExecFnp = void (*)(VlSelfP, bool); -// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler -using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId); - // Track dependencies for a single MTask. class VlMTaskVertex final { // MEMBERS @@ -166,24 +163,23 @@ private: std::atomic m_ready_size; std::thread m_cthread; // Underlying C++ thread record - VerilatedContext* const m_contextp; // Context for spawned thread VL_UNCOPYABLE(VlWorkerThread); public: // CONSTRUCTORS - explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); + explicit VlWorkerThread(VerilatedContext* contextp); ~VlWorkerThread(); // METHODS + template // inline void dequeWork(ExecRec* workp) VL_MT_SAFE_EXCLUDES(m_mutex) { // Spin for a while, waiting for new data - for (int i = 0; i < VL_LOCK_SPINS; ++i) { - if (VL_LIKELY(m_ready_size.load(std::memory_order_relaxed))) { // - break; + if VL_CONSTEXPR_CXX17 (SpinWait) { + for (unsigned i = 0; i < VL_LOCK_SPINS; ++i) { + if (VL_LIKELY(m_ready_size.load(std::memory_order_relaxed))) break; + VL_CPU_RELAX(); } - VL_CPU_RELAX(); } VerilatedLockGuard lock{m_mutex}; while (m_ready.empty()) { @@ -197,7 +193,7 @@ public: m_ready.erase(m_ready.begin()); m_ready_size.fetch_sub(1, std::memory_order_relaxed); } - inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle) + inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle = false) VL_MT_SAFE_EXCLUDES(m_mutex) { bool notify; { @@ -209,15 +205,14 @@ public: if (notify) m_cv.notify_one(); } - inline void shutdown() { addTask(shutdownTask, nullptr, false); } - static void shutdownTask(void*, bool); + void shutdown(); // Finish current tasks, then terminate thread + void wait(); // Blocks calling thread until all tasks complete in this thread void workerLoop(); - static void startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); + static void startWorker(VlWorkerThread* workerp, VerilatedContext* contextp); }; -class VlThreadPool final { +class VlThreadPool final : public VerilatedVirtualBase { // MEMBERS std::vector m_workers; // our workers @@ -226,9 +221,8 @@ public: // Construct a thread pool with 'nThreads' dedicated threads. The thread // pool will create these threads and make them available to execute tasks // via this->workerp(index)->addTask(...) - VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp, - VlStartWorkerCb startCb); - ~VlThreadPool(); + VlThreadPool(VerilatedContext* contextp, unsigned nThreads); + virtual ~VlThreadPool(); // METHODS inline int numThreads() const { return m_workers.size(); } diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 7915c3645..0d0f7c0f6 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -194,8 +194,6 @@ private: static void parallelWorkerTask(void*, bool); #endif - using ParallelCallbackMap = std::unordered_map>; - protected: uint32_t* m_sigs_oldvalp = nullptr; // Previous value store EData* m_sigs_enabledp = nullptr; // Bit vector of enabled codes (nullptr = all on) @@ -203,10 +201,10 @@ private: uint64_t m_timeLastDump = 0; // Last time we did a dump std::vector m_sigs_enabledVec; // Staging for m_sigs_enabledp std::vector m_initCbs; // Routines to initialize tracing - ParallelCallbackMap m_fullCbs; // Routines to perform full dump - ParallelCallbackMap m_chgCbs; // Routines to perform incremental dump + std::vector m_fullCbs; // Routines to perform full dump + std::vector m_chgCbs; // Routines to perform incremental dump std::vector m_cleanupCbs; // Routines to call at the end of dump - std::vector m_threadPoolps; // All thread pools, in insertion order + VerilatedContext* m_contextp = nullptr; // The context used by the traced models bool m_fullDump = true; // Whether a full dump is required on the next call to 'dump' uint32_t m_nextCode = 0; // Next code number to assign uint32_t m_numSignals = 0; // Number of distinct signals @@ -217,16 +215,16 @@ private: double m_timeRes = 1e-9; // Time resolution (ns/ms etc) double m_timeUnit = 1e-0; // Time units (ns/ms etc) - void addThreadPool(VlThreadPool* threadPoolp) VL_MT_SAFE_EXCLUDES(m_mutex); + void addContext(VerilatedContext*) VL_MT_SAFE_EXCLUDES(m_mutex); - void addCallbackRecord(std::vector& cbVec, CallbackRecord& cbRec) + void addCallbackRecord(std::vector& cbVec, CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex); // Equivalent to 'this' but is of the sub-type 'T_Trace*'. Use 'self()->' // to access duck-typed functions to avoid a virtual function call. T_Trace* self() { return static_cast(this); } - void runParallelCallbacks(const ParallelCallbackMap& cbMap); + void runCallbacks(const std::vector& cbVec); // Flush any remaining data for this file static void onFlush(void* selfp) VL_MT_UNSAFE_ONE; @@ -341,10 +339,10 @@ public: //========================================================================= // Non-hot path internal interface to Verilator generated code - void addInitCb(initCb_t cb, void* userp) VL_MT_SAFE; - void addFullCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE; - void addChgCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE; - void addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE; + void addInitCb(initCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addFullCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addChgCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; void scopeEscape(char flag) { m_scopeEscape = flag; } diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index d2ffa965c..a09ac0f43 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -478,55 +478,52 @@ template <> VL_ATTR_NOINLINE void VerilatedTrace::ParallelWo #endif template <> -void VerilatedTrace::runParallelCallbacks(const ParallelCallbackMap& cbMap) { - for (VlThreadPool* threadPoolp : m_threadPoolps) { +void VerilatedTrace::runCallbacks(const std::vector& cbVec) { #ifdef VL_TRACE_PARALLEL - // If tracing in parallel, dispatch to the thread pool (if exists) - if (threadPoolp && threadPoolp->numThreads()) { - // List of work items for thread (std::list, as ParallelWorkerData is not movable) - std::list workerData; - // We use the whole pool + the main thread - const unsigned threads = threadPoolp->numThreads() + 1; - // Main thread executes all jobs with index % threads == 0 - std::vector mainThreadWorkerData; - // The tracing callbacks to execute on this thread-pool - const auto& cbVec = cbMap.at(threadPoolp); - // Enuque all the jobs - for (unsigned i = 0; i < cbVec.size(); ++i) { - const CallbackRecord& cbr = cbVec[i]; - // Always get the trace buffer on the main thread - Buffer* const bufp = getTraceBuffer(); - // Create new work item - workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); - // Grab the new work item - ParallelWorkerData* const itemp = &workerData.back(); - // Enqueue task to thread pool, or main thread - if (unsigned rem = i % threads) { - threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp, false); - } else { - mainThreadWorkerData.push_back(itemp); - } - } - // Execute main thead jobs - for (ParallelWorkerData* const itemp : mainThreadWorkerData) { - parallelWorkerTask(itemp, false); - } - // Commit all trace buffers in order - for (ParallelWorkerData& item : workerData) { - // Wait until ready - item.wait(); - // Commit the buffer - commitTraceBuffer(item.m_bufp); - } - continue; + // If tracing in parallel, dispatch to the thread pool + VlThreadPool* threadPoolp = static_cast(m_contextp->threadPoolp()); + // List of work items for thread (std::list, as ParallelWorkerData is not movable) + std::list workerData; + // We use the whole pool + the main thread + const unsigned threads = threadPoolp->numThreads() + 1; + // Main thread executes all jobs with index % threads == 0 + std::vector mainThreadWorkerData; + // Enuque all the jobs + for (unsigned i = 0; i < cbVec.size(); ++i) { + const CallbackRecord& cbr = cbVec[i]; + // Always get the trace buffer on the main thread + Buffer* const bufp = getTraceBuffer(); + // Create new work item + workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); + // Grab the new work item + ParallelWorkerData* const itemp = &workerData.back(); + // Enqueue task to thread pool, or main thread + if (unsigned rem = i % threads) { + threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp); + } else { + mainThreadWorkerData.push_back(itemp); } + } + // Execute main thead jobs + for (ParallelWorkerData* const itemp : mainThreadWorkerData) { + parallelWorkerTask(itemp, false); + } + // Commit all trace buffers in order + for (ParallelWorkerData& item : workerData) { + // Wait until ready + item.wait(); + // Commit the buffer + commitTraceBuffer(item.m_bufp); + } + + // Done + return; #endif - // Fall back on sequential execution - for (const CallbackRecord& cbr : cbMap.at(threadPoolp)) { - Buffer* const traceBufferp = getTraceBuffer(); - cbr.m_dumpCb(cbr.m_userp, traceBufferp); - commitTraceBuffer(traceBufferp); - } + // Fall back on sequential execution + for (const CallbackRecord& cbr : cbVec) { + Buffer* const traceBufferp = getTraceBuffer(); + cbr.m_dumpCb(cbr.m_userp, traceBufferp); + commitTraceBuffer(traceBufferp); } } @@ -579,9 +576,9 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // Run the callbacks if (VL_UNLIKELY(m_fullDump)) { m_fullDump = false; // No more need for next dump to be full - runParallelCallbacks(m_fullCbs); + runCallbacks(m_fullCbs); } else { - runParallelCallbacks(m_chgCbs); + runCallbacks(m_chgCbs); } for (uint32_t i = 0; i < m_cleanupCbs.size(); ++i) { @@ -607,18 +604,20 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // Non-hot path internal interface to Verilator generated code template <> -void VerilatedTrace::addThreadPool(VlThreadPool* threadPoolp) +void VerilatedTrace::addContext(VerilatedContext* contextp) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; - for (VlThreadPool* const poolp : m_threadPoolps) { - if (poolp == threadPoolp) return; + if (m_contextp && contextp != m_contextp) { + VL_FATAL_MT( + __FILE__, __LINE__, "", + "A trace file instance can only handle models from the same simulation context"); } - m_threadPoolps.push_back(threadPoolp); + m_contextp = contextp; } template <> void VerilatedTrace::addCallbackRecord(std::vector& cbVec, - CallbackRecord& cbRec) + CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; if (VL_UNCOVERABLE(timeLastDump() != 0)) { // LCOV_EXCL_START @@ -630,28 +629,28 @@ void VerilatedTrace::addCallbackRecord(std::vector -void VerilatedTrace::addInitCb(initCb_t cb, void* userp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addCallbackRecord(m_initCbs, cbr); +void VerilatedTrace::addInitCb(initCb_t cb, void* userp, + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_initCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, - VlThreadPool* threadPoolp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addThreadPool(threadPoolp); - addCallbackRecord(m_fullCbs[threadPoolp], cbr); + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, - VlThreadPool* threadPoolp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addThreadPool(threadPoolp); - addCallbackRecord(m_chgCbs[threadPoolp], cbr); + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addCallbackRecord(m_cleanupCbs, cbr); +void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp, + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_cleanupCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::pushNamePrefix(const std::string& prefix) { diff --git a/src/V3EmitCHeaders.cpp b/src/V3EmitCHeaders.cpp index ef53dfa16..77835212b 100644 --- a/src/V3EmitCHeaders.cpp +++ b/src/V3EmitCHeaders.cpp @@ -251,15 +251,15 @@ class EmitCHeader final : public EmitCConstInit { emitTextSection(modp, VNType::atScHdr); // Open class body {{{ + puts("\nclass "); + puts(prefixNameProtect(modp)); if (const AstClass* const classp = VN_CAST(modp, Class)) { - puts("class "); - puts(prefixNameProtect(modp)); if (classp->extendsp()) { puts(" : public "); puts(prefixNameProtect(classp->extendsp()->classp())); } } else { - puts("VL_MODULE(" + prefixNameProtect(modp) + ")"); + puts(" final : public VerilatedModule"); } puts(" {\n"); ofp()->resetPrivate(); diff --git a/src/V3EmitCMake.cpp b/src/V3EmitCMake.cpp index 7df71dfeb..710829eaf 100644 --- a/src/V3EmitCMake.cpp +++ b/src/V3EmitCMake.cpp @@ -173,7 +173,7 @@ class CMakeEmitter final { + ".cpp"); } } - if (v3Global.opt.mtasks()) { + if (v3Global.opt.threads()) { global.emplace_back("${VERILATOR_ROOT}/include/verilated_threads.cpp"); } if (v3Global.opt.usesProfiler()) { diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index e04c79f7e..203582609 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -89,11 +89,12 @@ class EmitCModel final : public EmitCFunc { puts("\n"); puts("// This class is the main interface to the Verilated model\n"); + puts("class " + topClassName() + " VL_NOT_FINAL : "); if (optSystemC()) { - puts("SC_MODULE(" + topClassName() + ") {\n"); - } else { - puts("class " + topClassName() + " VL_NOT_FINAL {\n"); + // SC_MODULE, but with multiple-inheritance of VerilatedModel + puts("public ::sc_core::sc_module, "); } + puts("public VerilatedModel {\n"); ofp()->resetPrivate(); ofp()->putsPrivate(true); // private: @@ -221,6 +222,11 @@ class EmitCModel final : public EmitCFunc { + topClassName() + "& rhs);\n"); } + puts("\n// Abstract methods from VerilatedModel\n"); + puts("const char* hierName() override;\n"); + puts("const char* modelName() override;\n"); + puts("unsigned threads() override;\n"); + puts("} VL_ATTR_ALIGNED(VL_CACHE_LINE_BYTES);\n"); ofp()->putsEndGuard(); @@ -235,7 +241,8 @@ class EmitCModel final : public EmitCFunc { puts(topClassName() + "::" + topClassName()); if (optSystemC()) { puts("(sc_module_name /* unused */)\n"); - puts(" : vlSymsp{new " + symClassName() + "(nullptr, name(), this)}\n"); + puts(" : vlSymsp{new " + symClassName() + + "(Verilated::threadContextp(), name(), this)}\n"); } else { puts(+"(VerilatedContext* _vcontextp__, const char* _vcname__)\n"); puts(" : vlSymsp{new " + symClassName() + "(_vcontextp__, _vcname__, this)}\n"); @@ -263,6 +270,8 @@ class EmitCModel final : public EmitCFunc { puts(" , rootp{&(vlSymsp->TOP)}\n"); puts("{\n"); + puts("// Register model with the context\n"); + puts("vlSymsp->_vm_contextp__->addModel(this);\n"); if (optSystemC()) { // Create sensitivity list for when to evaluate the model. @@ -301,7 +310,7 @@ class EmitCModel final : public EmitCFunc { if (!optSystemC()) { puts("\n"); puts(topClassName() + "::" + topClassName() + "(const char* _vcname__)\n"); - puts(" : " + topClassName() + "(nullptr, _vcname__)\n{\n}\n"); + puts(" : " + topClassName() + "(Verilated::threadContextp(), _vcname__)\n{\n}\n"); } } @@ -428,7 +437,7 @@ class EmitCModel final : public EmitCFunc { } if (v3Global.opt.profExec()) { - puts("vlSymsp->__Vm_executionProfiler.configure(*(vlSymsp->_vm_contextp__));\n"); + puts("vlSymsp->__Vm_executionProfilerp->configure();\n"); puts("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).evalBegin();\n"); } @@ -477,6 +486,13 @@ class EmitCModel final : public EmitCFunc { puts("\nVL_ATTR_COLD void " + topClassName() + "::final() {\n"); puts(/**/ topModNameProtected + "__" + protect("_final") + "(&(vlSymsp->TOP));\n"); puts("}\n"); + + putSectionDelimiter("Implementations of abstract methods from VerilatedModel\n"); + puts("const char* " + topClassName() + "::hierName() { return vlSymsp->name(); }\n"); + puts("const char* " + topClassName() + "::modelName() { return \"" + topClassName() + + "\"; }\n"); + puts("unsigned " + topClassName() + "::threads() { return " + + cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n"); } void emitTraceMethods(AstNodeModule* modp) { @@ -529,7 +545,8 @@ class EmitCModel final : public EmitCFunc { puts(/**/ "}"); } puts(/**/ "if (false && levels && options) {} // Prevent unused\n"); - puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") + ", &(vlSymsp->TOP));\n"); + puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") + + ", &(vlSymsp->TOP), contextp());\n"); puts(/**/ topModNameProtected + "__" + protect("trace_register") + "(&(vlSymsp->TOP), tfp->spTrace());\n"); diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index c66f346b0..e325aa79e 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -445,17 +445,17 @@ void EmitCSyms::emitSymHdr() { } puts("bool __Vm_didInit = false;\n"); - if (v3Global.opt.profExec()) { - puts("\n// EXECUTION PROFILING\n"); - puts("VlExecutionProfiler __Vm_executionProfiler;\n"); - } - if (v3Global.opt.mtasks()) { puts("\n// MULTI-THREADING\n"); puts("VlThreadPool* const __Vm_threadPoolp;\n"); puts("bool __Vm_even_cycle = false;\n"); } + if (v3Global.opt.profExec()) { + puts("\n// EXECUTION PROFILING\n"); + puts("VlExecutionProfiler* const __Vm_executionProfilerp;\n"); + } + puts("\n// MODULE INSTANCE STATE\n"); for (const auto& i : m_scopes) { const AstScope* const scopep = i.first; @@ -673,7 +673,6 @@ void EmitCSyms::emitSymImp() { puts("_vm_pgoProfiler.write(\"" + topClassName() + "\", _vm_contextp__->profVltFilename());\n"); } - if (v3Global.opt.mtasks()) puts("delete __Vm_threadPoolp;\n"); puts("}\n\n"); // Constructor @@ -705,12 +704,13 @@ void EmitCSyms::emitSymImp() { // Note we create N-1 threads in the thread pool. The thread // that calls eval() becomes the final Nth thread for the // duration of the eval call. - puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, " - + cvtToStr(v3Global.opt.threads() - 1) + ", " - + (v3Global.opt.profExec() - ? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup" - : "nullptr, nullptr") - + "}}\n"); + puts(" , __Vm_threadPoolp{static_cast(contextp->threadPoolp())}\n"); + } + + if (v3Global.opt.profExec()) { + puts(" , " + "__Vm_executionProfilerp{static_cast(contextp->" + "enableExecutionProfiler(&VlExecutionProfiler::construct))}\n"); } puts(" // Setup module instances\n"); diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index 3fb3907be..800cf589c 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -116,7 +116,7 @@ public: putMakeClassEntry(of, v3Global.opt.traceSourceLang() + ".cpp"); } } - if (v3Global.opt.mtasks()) putMakeClassEntry(of, "verilated_threads.cpp"); + if (v3Global.opt.threads()) putMakeClassEntry(of, "verilated_threads.cpp"); if (v3Global.opt.usesProfiler()) { putMakeClassEntry(of, "verilated_profiler.cpp"); } diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index 9fa1b099a..7113c5e0e 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -512,8 +512,10 @@ private: m_regFuncp->addStmtsp(new AstText(flp, "tracep->addChgCb(", true)); } m_regFuncp->addStmtsp(new AstAddrOfCFunc(flp, funcp)); - const string threadPool{m_parallelism > 1 ? "vlSymsp->__Vm_threadPoolp" : "nullptr"}; - m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf, " + threadPool + ");\n", true)); + m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf", true)); + m_regFuncp->addStmtsp( + new AstText(flp, ", vlSelf->vlSymsp->__Vm_modelp->contextp()", true)); + m_regFuncp->addStmtsp(new AstText(flp, ");\n", true)); } else { // Sub functions funcp->argTypes(v3Global.opt.traceClassBase() + "::Buffer* bufp"); @@ -700,7 +702,8 @@ private: // Register it m_regFuncp->addStmtsp(new AstText(fl, "tracep->addCleanupCb(", true)); m_regFuncp->addStmtsp(new AstAddrOfCFunc(fl, cleanupFuncp)); - m_regFuncp->addStmtsp(new AstText(fl, ", vlSelf);\n", true)); + m_regFuncp->addStmtsp( + new AstText(fl, ", vlSelf, vlSelf->vlSymsp->__Vm_modelp->contextp());\n", true)); // Clear global activity flag cleanupFuncp->addStmtsp( diff --git a/test_regress/driver.pl b/test_regress/driver.pl index cbd9ba9ea..ae0ed4f36 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -579,6 +579,7 @@ sub new { sc_time_resolution => "SC_PS", # Keep - PS is SystemC default sim_time => 1100, threads => -1, # --threads (negative means auto based on scenario) + context_threads => 0, # Number of threads to allocate in the context benchmark => $opt_benchmark, verbose => $opt_verbose, run_env => '', @@ -974,7 +975,11 @@ sub compile { $self->oprint("Compile\n") if $self->{verbose}; die "%Error: 'threads =>' argument must be <= 1 for vlt scenario" if $param{vlt} && $param{threads} > 1; - $param{threads} = ::calc_threads($Vltmt_threads) if ($param{threads} < 0 && $param{vltmt}); + # Compute automatic parameter values + $param{threads} = ::calc_threads($Vltmt_threads) if $param{threads} < 0 && $param{vltmt}; + $param{context_threads} = $param{threads} >= 1 ? $param{threads} : 1 if !$param{context_threads}; + $self->{threads} = $param{threads}; + $self->{context_threads} = $param{context_threads}; compile_vlt_cmd(%param); @@ -1795,6 +1800,7 @@ sub _make_main { } print $fh " const std::unique_ptr contextp{new VerilatedContext};\n"; + print $fh " contextp->threads($self->{context_threads});\n"; print $fh " contextp->commandArgs(argc, argv);\n"; print $fh " contextp->debug(" . ($self->{verilated_debug} ? 1 : 0) . ");\n"; print $fh " srand48(5);\n"; # Ensure determinism diff --git a/test_regress/t/t_embed1.pl b/test_regress/t/t_embed1.pl index 08e4c042c..2c5b8a918 100755 --- a/test_regress/t/t_embed1.pl +++ b/test_regress/t/t_embed1.pl @@ -22,7 +22,8 @@ mkdir $child_dir; (VM_PREFIX => "$Self->{VM_PREFIX}_child", top_filename => "$Self->{name}_child.v", verilator_flags => ["-cc", "-Mdir", "${child_dir}", "--debug-check"], - threads => $Self->{vltmt} ? $Self->get_default_vltmt_threads() : 0 + # Can't use multi threading (like hier blocks), but needs to be thread safe + threads => $Self->{vltmt} ? 1 : 0, ); run(logfile => "${child_dir}/vlt_compile.log", diff --git a/test_regress/t/t_gantt_two.cpp b/test_regress/t/t_gantt_two.cpp new file mode 100644 index 000000000..da253fab7 --- /dev/null +++ b/test_regress/t/t_gantt_two.cpp @@ -0,0 +1,43 @@ +// +// DESCRIPTION: Verilator: Verilog Multiple Model Test Module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 +// + +#include +#include "verilated.h" +#include "Vt_gantt_two.h" + +int main(int argc, char** argv, char** env) { + srand48(5); + + const std::unique_ptr contextp{new VerilatedContext}; +#ifdef VL_THREADED + contextp->threads(2); +#endif + contextp->commandArgs(argc, argv); + contextp->debug(0); + + std::unique_ptr topap{new Vt_gantt_two{contextp.get(), "topa"}}; + std::unique_ptr topbp{new Vt_gantt_two{contextp.get(), "topb"}}; + + topap->clk = false; + topap->eval(); + topbp->clk = false; + topbp->eval(); + + contextp->timeInc(10); + while ((contextp->time() < 1100) && !contextp->gotFinish()) { + topap->clk = !topap->clk; + topap->eval(); + topbp->clk = !topbp->clk; + topbp->eval(); + contextp->timeInc(5); + } + if (!contextp->gotFinish()) { + vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); + } + return 0; +} diff --git a/test_regress/t/t_gantt_two.pl b/test_regress/t/t_gantt_two.pl new file mode 100755 index 000000000..768f55440 --- /dev/null +++ b/test_regress/t/t_gantt_two.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +# Test for bin/verilator_gantt, + +scenarios(vlt_all => 1); + +# It doesn't really matter what test +# we use, so long as it runs several cycles, +# enough for the profiling to happen: +top_filename("t/t_gen_alw.v"); + +compile( + make_top_shell => 0, + make_main => 0, + v_flags2 => ["--prof-exec --exe $Self->{t_dir}/$Self->{name}.cpp"], + # Checks below care about thread count, so use 2 (minimum reasonable) + threads => $Self->{vltmt} ? 2 : 0, + make_flags => 'CPPFLAGS_ADD=-DVL_NO_LEGACY', + ); + +execute( + all_run_flags => ["+verilator+prof+exec+start+4", + " +verilator+prof+exec+window+4", + " +verilator+prof+exec+file+$Self->{obj_dir}/profile_exec.dat", + " +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt", + ], + check_finished => 1, + ); + +# For now, verilator_gantt still reads from STDIN +# (probably it should take a file, gantt.dat like verilator_profcfunc) +# The profiling data still goes direct to the runtime's STDOUT +# (maybe that should go to a separate file - gantt.dat?) +run(cmd => ["$ENV{VERILATOR_ROOT}/bin/verilator_gantt", + "$Self->{obj_dir}/profile_exec.dat", + "--vcd $Self->{obj_dir}/profile_exec.vcd", + "| tee $Self->{obj_dir}/gantt.log"], + ); + +if ($Self->{vltmt}) { + file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 2/i); + file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 7/i); +} else { + file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 1/i); + file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 0/i); +} +file_grep("$Self->{obj_dir}/gantt.log", qr/Total evals += 4/i); + +# Diff to itself, just to check parsing +vcd_identical("$Self->{obj_dir}/profile_exec.vcd", "$Self->{obj_dir}/profile_exec.vcd"); + +ok(1); +1; diff --git a/test_regress/t/t_hier_block_cmake/main.cpp b/test_regress/t/t_hier_block_cmake/main.cpp index e49101162..58bc8d5ad 100644 --- a/test_regress/t/t_hier_block_cmake/main.cpp +++ b/test_regress/t/t_hier_block_cmake/main.cpp @@ -14,8 +14,11 @@ int main(int argc, char *argv[]) { const std::unique_ptr contextp{new VerilatedContext}; - std::unique_ptr top{new Vt_hier_block{contextp.get(), "top"}}; +#if VL_THREADED + contextp->threads(6); +#endif contextp->commandArgs(argc, argv); + std::unique_ptr top{new Vt_hier_block{contextp.get(), "top"}}; for (int i = 0; i < 100 && !contextp->gotFinish(); ++i) { top->eval(); top->clk ^= 1; diff --git a/test_regress/t/t_lib_prot_shared.pl b/test_regress/t/t_lib_prot_shared.pl index 1a3f8af5f..cc0c2f977 100755 --- a/test_regress/t/t_lib_prot_shared.pl +++ b/test_regress/t/t_lib_prot_shared.pl @@ -59,7 +59,8 @@ while (1) { "-LDFLAGS", "'-Wl,-rpath,$abs_secret_dir -L$abs_secret_dir -l$secret_prefix'"], xsim_flags2 => ["$secret_dir/secret.sv"], - threads => $Self->{vltmt} ? 1 : 0 + threads => $Self->{vltmt} ? 1 : 0, + context_threads => $Self->{vltmt} ? 6 : 1 ); execute( diff --git a/test_regress/t/t_threads_crazy.pl b/test_regress/t/t_threads_crazy.pl index 6bb21acb0..c72858f2c 100755 --- a/test_regress/t/t_threads_crazy.pl +++ b/test_regress/t/t_threads_crazy.pl @@ -10,20 +10,16 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di scenarios(vltmt => 1); -if ($Self->cfg_with_m32) { - skip("Does not work with -m32 (resource unavailable)"); -} - compile( verilator_flags2 => ['--cc'], - threads => 1024 + threads => 4, + context_threads => 2 ); execute( - check_finished => 1, + fails => 1 ); -file_grep($Self->{run_log_filename}, qr/System has .* CPUs but.*--threads 1024/); - +file_grep($Self->{run_log_filename}, qr/%Error: .*\/verilated\.cpp:\d+: VerilatedContext has 2 threads but model 'Vt_threads_crazy' \(instantiated as 'top'\) was Verilated with --threads 4\./); ok(1); 1; diff --git a/test_regress/t/t_threads_crazy_context.pl b/test_regress/t/t_threads_crazy_context.pl new file mode 100755 index 000000000..8e28bb87a --- /dev/null +++ b/test_regress/t/t_threads_crazy_context.pl @@ -0,0 +1,36 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt_all => 1); + +if ($Self->cfg_with_m32) { + skip("Does not work with -m32 (resource unavailable)"); +} + +top_filename("t/t_threads_crazy.v"); + +compile( + verilator_flags2 => ['--cc'], + threads => $Self->{vltmt} ? 2 : 0, + context_threads => 1024 + ); + +execute( + check_finished => 1, + ); + +if ($Self->{vltmt}) { + file_grep($Self->{run_log_filename}, qr/System has \d+ hardware threads but simulation thread count set to 1024\. This will likely cause significant slowdown\./); +} else { + file_grep($Self->{run_log_filename}, qr/Verilator run-time library built without VL_THREADS\. Ignoring call to 'VerilatedContext::threads' with argument 1024\./); +} + +ok(1); +1; diff --git a/test_regress/t/t_wrapper_context.cpp b/test_regress/t/t_wrapper_context.cpp index 87332eb8c..31a9334f0 100644 --- a/test_regress/t/t_wrapper_context.cpp +++ b/test_regress/t/t_wrapper_context.cpp @@ -92,6 +92,8 @@ int main(int argc, char** argv, char** env) { std::unique_ptr context1p{new VerilatedContext}; // configuration + context0p->threads(1); + context1p->threads(1); context0p->fatalOnError(false); context1p->fatalOnError(false); context0p->traceEverOn(true);