verilator/include/verilated_threads.cpp

// -*- mode: C++; c-file-style: "cc-mode" -*-
//=============================================================================
//
// Code available from: https://verilator.org
//
// Copyright 2012-2022 by Wilson Snyder. This program is free software; you can
// redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//=============================================================================
///
/// \file
/// \brief Verilated thread pool implementation code
///
/// This file must be compiled and linked against all Verilated objects
/// that use --threads.
///
/// Use "verilator --threads" to add this to the Makefile for the linker.
///
//=============================================================================

#include "verilatedos.h"
#include "verilated_threads.h"

#ifdef VL_PROFILER
#include "verilated_profiler.h"
#endif

#include <cstdio>
#include <memory>
#include <string>

//=============================================================================
// Globals

// Internal note: Globals may multi-construct, see verilated.cpp top.

std::atomic<vluint64_t> VlMTaskVertex::s_yields;

//=============================================================================
// VlMTaskVertex

VlMTaskVertex::VlMTaskVertex(vluint32_t upstreamDepCount)
    : m_upstreamDepsDone{0}
    , m_upstreamDepCount{upstreamDepCount} {
    assert(atomic_is_lock_free(&m_upstreamDepsDone));
}

//=============================================================================
// VlWorkerThread

VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp,
                               VlExecutionProfiler* profilerp)
    : m_ready_size{0}
    , m_exiting{false}
    , m_cthread{startWorker, this, threadId, profilerp}
    , m_contextp{contextp} {}

VlWorkerThread::~VlWorkerThread() {
    m_exiting.store(true, std::memory_order_release);
    wakeUp();
    // The thread should exit; join it.
    m_cthread.join();
}

void VlWorkerThread::workerLoop() {
    ExecRec work;
    work.m_fnp = nullptr;

    while (true) {
        if (VL_LIKELY(!work.m_fnp)) dequeWork(&work);

        // Do this here, not above, to avoid a race with the destructor.
        if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break;

        if (VL_LIKELY(work.m_fnp)) {
            work.m_fnp(work.m_selfp, work.m_evenCycle);
            work.m_fnp = nullptr;
        }
    }
}

void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId,
                                 VlExecutionProfiler* profilerp) {
    Verilated::threadContextp(workerp->m_contextp);
#ifdef VL_PROFILER
    // Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might
    // not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr.
    if (profilerp) profilerp->setupThread(threadId);
#endif
    workerp->workerLoop();
}

//=============================================================================
// VlThreadPool

VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads,
                           VlExecutionProfiler* profiler) {
    // --threads N passes nThreads=N-1, as the "main" threads counts as 1
    ++nThreads;
    const unsigned cpus = std::thread::hardware_concurrency();
    if (cpus < nThreads) {
        static int warnedOnce = 0;
        if (!warnedOnce++) {
            VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with"
                         " --threads %d; may run slow.\n",
                         cpus, nThreads);
        }
    }
    // Create worker threads
    for (uint32_t threadId = 1; threadId < nThreads; ++threadId) {
        m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler});
    }
}

VlThreadPool::~VlThreadPool() {
    // Each ~WorkerThread will wait for its thread to exit.
    for (auto& i : m_workers) delete i;
}
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`// -- mode: C++; c-file-style: "cc-mode" --`
			`//=============================================================================`
			`//`
Commentary: Cleanup all include/* header comments. 2021-03-20 21:46:00 +00:00			`// Code available from: https://verilator.org`
			`//`
Copyright year update. 2022-01-01 13:26:40 +00:00			`// Copyright 2012-2022 by Wilson Snyder. This program is free software; you can`
Add SPDX license identifiers. No functional change. 2020-03-21 15:24:24 +00:00			`// redistribute it and/or modify it under the terms of either the GNU`
			`// Lesser General Public License Version 3 or the Perl Artistic License`
			`// Version 2.0.`
			`// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`//`
			`//=============================================================================`
			`///`
			`/// \file`
Commentary: Cleanup all include/* header comments. 2021-03-20 21:46:00 +00:00			`/// \brief Verilated thread pool implementation code`
			`///`
			`/// This file must be compiled and linked against all Verilated objects`
			`/// that use --threads.`
			`///`
			`/// Use "verilator --threads" to add this to the Makefile for the linker.`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`///`
			`//=============================================================================`

			`#include "verilatedos.h"`
			`#include "verilated_threads.h"`
Internals: Cleanup and standardize include order. No functional change intended. 2018-10-14 17:43:24 +00:00
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`#ifdef VL_PROFILER`
			`#include "verilated_profiler.h"`
			`#endif`

MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`#include <cstdio>`
Internals: Add cpplint control file and related cleanups 2022-01-09 21:49:38 +00:00			`#include <memory>`
			`#include <string>`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
Convert VPI to singleton, part of (#2660). 2021-03-05 00:23:40 +00:00			`//=============================================================================`
			`// Globals`

			`// Internal note: Globals may multi-construct, see verilated.cpp top.`

Fix multithreaded yield behavior when no work. Signed-off-by: Wilson Snyder <wsnyder@wsnyder.org> 2019-10-07 23:27:31 +00:00			`std::atomic<vluint64_t> VlMTaskVertex::s_yields;`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
			`//=============================================================================`
			`// VlMTaskVertex`

			`VlMTaskVertex::VlMTaskVertex(vluint32_t upstreamDepCount)`
C++11: Use member declaration initalizations. No functional change intended. 2020-08-16 13:55:36 +00:00			`: m_upstreamDepsDone{0}`
			`, m_upstreamDepCount{upstreamDepCount} {`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`assert(atomic_is_lock_free(&m_upstreamDepsDone));`
			`}`

			`//=============================================================================`
			`// VlWorkerThread`

Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp,`
			`VlExecutionProfiler* profilerp)`
Internals: Some clang-tidy cleanups. No functional change intended. 2021-07-25 17:38:27 +00:00			`: m_ready_size{0}`
C++11: Use member declaration initalizations. No functional change intended. 2020-08-16 13:55:36 +00:00			`, m_exiting{false}`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`, m_cthread{startWorker, this, threadId, profilerp}`
Add simulation context (VerilatedContext) (#2660). (#2813) Add simulation context (VerilatedContext) to allow multiple fully independent models to be in the same process. Please see the updated examples. Add context->time() and context->timeInc() API calls, to set simulation time. These now are recommended in place of the legacy sc_time_stamp(). 2021-03-07 16:01:54 +00:00			`, m_contextp{contextp} {}`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
			`VlWorkerThread::~VlWorkerThread() {`
			`m_exiting.store(true, std::memory_order_release);`
Fix multithreaded yield behavior when no work. Signed-off-by: Wilson Snyder <wsnyder@wsnyder.org> 2019-10-07 23:27:31 +00:00			`wakeUp();`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`// The thread should exit; join it.`
			`m_cthread.join();`
			`}`

			`void VlWorkerThread::workerLoop() {`
			`ExecRec work;`
C++11: Use nullptr. No functional change. 2020-08-15 14:12:55 +00:00			`work.m_fnp = nullptr;`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
Cleanup misc clang-tidy warnings. No functional change intended 2020-04-04 02:31:54 +00:00			`while (true) {`
Internals: clang-format cleanups. No functional change. 2020-04-04 17:45:24 +00:00			`if (VL_LIKELY(!work.m_fnp)) dequeWork(&work);`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
			`// Do this here, not above, to avoid a race with the destructor.`
clang-format many files. No functional change. Use nodist/clang_formatter to reformat files that are now clean. 2020-04-14 02:51:35 +00:00			`if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break;`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
			`if (VL_LIKELY(work.m_fnp)) {`
Emit model implementation as loose methods. (#3006) This patch introduces the concept of 'loose' methods, which semantically are methods, but are declared as global functions, and are passed an explicit 'self' pointer. This enables these methods to be declared outside the class, only when they are needed, therefore removing the header dependency. The bulk of the emitted model implementation now uses loose methods. 2021-06-13 13:33:11 +00:00			`work.m_fnp(work.m_selfp, work.m_evenCycle);`
C++11: Use nullptr. No functional change. 2020-08-15 14:12:55 +00:00			`work.m_fnp = nullptr;`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`}`
			`}`
			`}`

Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId,`
			`VlExecutionProfiler* profilerp) {`
Add simulation context (VerilatedContext) (#2660). (#2813) Add simulation context (VerilatedContext) to allow multiple fully independent models to be in the same process. Please see the updated examples. Add context->time() and context->timeInc() API calls, to set simulation time. These now are recommended in place of the legacy sc_time_stamp(). 2021-03-07 16:01:54 +00:00			`Verilated::threadContextp(workerp->m_contextp);`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`#ifdef VL_PROFILER`
			`// Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might`
			`// not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr.`
			`if (profilerp) profilerp->setupThread(threadId);`
			`#endif`
Add simulation context (VerilatedContext) (#2660). (#2813) Add simulation context (VerilatedContext) to allow multiple fully independent models to be in the same process. Please see the updated examples. Add context->time() and context->timeInc() API calls, to set simulation time. These now are recommended in place of the legacy sc_time_stamp(). 2021-03-07 16:01:54 +00:00			`workerp->workerLoop();`
			`}`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00
			`//=============================================================================`
			`// VlThreadPool`

Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads,`
			`VlExecutionProfiler* profiler) {`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`// --threads N passes nThreads=N-1, as the "main" threads counts as 1`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`++nThreads;`
Internals: Add more const. No functional change. 2021-06-19 02:19:35 +00:00			`const unsigned cpus = std::thread::hardware_concurrency();`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`if (cpus < nThreads) {`
Warn only once if too few CPUs. 2019-06-15 13:17:51 +00:00			`static int warnedOnce = 0;`
			`if (!warnedOnce++) {`
			`VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with"`
Internals: clang-format cleanups. No functional change. 2020-04-04 17:45:24 +00:00			`" --threads %d; may run slow.\n",`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`cpus, nThreads);`
Warn only once if too few CPUs. 2019-06-15 13:17:51 +00:00			`}`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`}`
Improve run-time profiling The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible. 2022-03-25 19:46:50 +00:00			`// Create worker threads`
			`for (uint32_t threadId = 1; threadId < nThreads; ++threadId) {`
			`m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler});`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`}`
			`}`

			`VlThreadPool::~VlThreadPool() {`
clang-tidy cleanups. No functional change intended. 2020-11-11 02:40:14 +00:00			`// Each ~WorkerThread will wait for its thread to exit.`
			`for (auto& i : m_workers) delete i;`
MAJOR: Add multithreaded model generation. 2018-07-23 00:54:28 +00:00			`}`