mirror of
https://github.com/verilator/verilator.git
synced 2025-01-01 04:07:34 +00:00
b1b5b5dfe2
The --prof-threads option has been split into two independent options: 1. --prof-exec, for collecting verilator_gantt and other execution related profiling data, and 2. --prof-pgo, for collecting data needed for PGO The implementation of execution profiling is extricated from VlThreadPool and is now a separate class VlExecutionProfiler. This means --prof-exec can now be used for single-threaded models (though it does not measure a lot of things just yet). For consistency VerilatedProfiler is renamed VlPgoProfiler. Both VlExecutionProfiler and VlPgoProfiler are in verilated_profiler.{h/cpp}, but can be used completely independently. Also re-worked the execution profile format so it now only emits events without holding onto any temporaries. This is in preparation for some future optimizations that would be hindered by the introduction of function locals via AstText. Also removed the Barrier event. Clearing the profile buffers is not notably more expensive as the profiling records are trivially destructible.
122 lines
3.9 KiB
C++
122 lines
3.9 KiB
C++
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
//=============================================================================
|
|
//
|
|
// Code available from: https://verilator.org
|
|
//
|
|
// Copyright 2012-2022 by Wilson Snyder. This program is free software; you can
|
|
// redistribute it and/or modify it under the terms of either the GNU
|
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
// Version 2.0.
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
//
|
|
//=============================================================================
|
|
///
|
|
/// \file
|
|
/// \brief Verilated thread pool implementation code
|
|
///
|
|
/// This file must be compiled and linked against all Verilated objects
|
|
/// that use --threads.
|
|
///
|
|
/// Use "verilator --threads" to add this to the Makefile for the linker.
|
|
///
|
|
//=============================================================================
|
|
|
|
#include "verilatedos.h"
|
|
#include "verilated_threads.h"
|
|
|
|
#ifdef VL_PROFILER
|
|
#include "verilated_profiler.h"
|
|
#endif
|
|
|
|
#include <cstdio>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
//=============================================================================
|
|
// Globals
|
|
|
|
// Internal note: Globals may multi-construct, see verilated.cpp top.
|
|
|
|
std::atomic<vluint64_t> VlMTaskVertex::s_yields;
|
|
|
|
//=============================================================================
|
|
// VlMTaskVertex
|
|
|
|
VlMTaskVertex::VlMTaskVertex(vluint32_t upstreamDepCount)
|
|
: m_upstreamDepsDone{0}
|
|
, m_upstreamDepCount{upstreamDepCount} {
|
|
assert(atomic_is_lock_free(&m_upstreamDepsDone));
|
|
}
|
|
|
|
//=============================================================================
|
|
// VlWorkerThread
|
|
|
|
VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp,
|
|
VlExecutionProfiler* profilerp)
|
|
: m_ready_size{0}
|
|
, m_exiting{false}
|
|
, m_cthread{startWorker, this, threadId, profilerp}
|
|
, m_contextp{contextp} {}
|
|
|
|
VlWorkerThread::~VlWorkerThread() {
|
|
m_exiting.store(true, std::memory_order_release);
|
|
wakeUp();
|
|
// The thread should exit; join it.
|
|
m_cthread.join();
|
|
}
|
|
|
|
void VlWorkerThread::workerLoop() {
|
|
ExecRec work;
|
|
work.m_fnp = nullptr;
|
|
|
|
while (true) {
|
|
if (VL_LIKELY(!work.m_fnp)) dequeWork(&work);
|
|
|
|
// Do this here, not above, to avoid a race with the destructor.
|
|
if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break;
|
|
|
|
if (VL_LIKELY(work.m_fnp)) {
|
|
work.m_fnp(work.m_selfp, work.m_evenCycle);
|
|
work.m_fnp = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId,
|
|
VlExecutionProfiler* profilerp) {
|
|
Verilated::threadContextp(workerp->m_contextp);
|
|
#ifdef VL_PROFILER
|
|
// Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might
|
|
// not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr.
|
|
if (profilerp) profilerp->setupThread(threadId);
|
|
#endif
|
|
workerp->workerLoop();
|
|
}
|
|
|
|
//=============================================================================
|
|
// VlThreadPool
|
|
|
|
VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads,
|
|
VlExecutionProfiler* profiler) {
|
|
// --threads N passes nThreads=N-1, as the "main" threads counts as 1
|
|
++nThreads;
|
|
const unsigned cpus = std::thread::hardware_concurrency();
|
|
if (cpus < nThreads) {
|
|
static int warnedOnce = 0;
|
|
if (!warnedOnce++) {
|
|
VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with"
|
|
" --threads %d; may run slow.\n",
|
|
cpus, nThreads);
|
|
}
|
|
}
|
|
// Create worker threads
|
|
for (uint32_t threadId = 1; threadId < nThreads; ++threadId) {
|
|
m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler});
|
|
}
|
|
}
|
|
|
|
VlThreadPool::~VlThreadPool() {
|
|
// Each ~WorkerThread will wait for its thread to exit.
|
|
for (auto& i : m_workers) delete i;
|
|
}
|