mirror of
https://github.com/verilator/verilator.git
synced 2025-01-21 22:04:03 +00:00
230 lines
7.6 KiB
C++
230 lines
7.6 KiB
C++
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||
|
//=============================================================================
|
||
|
//
|
||
|
// THIS MODULE IS PUBLICLY LICENSED
|
||
|
//
|
||
|
// Copyright 2012-2018 by Wilson Snyder. This program is free software;
|
||
|
// you can redistribute it and/or modify it under the terms of either the GNU
|
||
|
// Lesser General Public License Version 3 or the Perl Artistic License Version 2.0.
|
||
|
//
|
||
|
// This is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
// for more details.
|
||
|
//
|
||
|
//=============================================================================
|
||
|
///
|
||
|
/// \file
|
||
|
/// \brief Thread pool for verilated modules
|
||
|
///
|
||
|
//=============================================================================
|
||
|
|
||
|
#include "verilatedos.h"
|
||
|
#include "verilated_threads.h"
|
||
|
#include <cstdio>
|
||
|
|
||
|
std::atomic<vluint64_t> VlNotification::s_yields;
|
||
|
|
||
|
VL_THREAD_LOCAL VlThreadPool::ProfileTrace* VlThreadPool::t_profilep = NULL;
|
||
|
|
||
|
//=============================================================================
|
||
|
// VlMTaskVertex
|
||
|
|
||
|
VlMTaskVertex::VlMTaskVertex(vluint32_t upstreamDepCount)
|
||
|
: m_upstreamDepsDone(0),
|
||
|
m_upstreamDepCount(upstreamDepCount) {
|
||
|
assert(atomic_is_lock_free(&m_upstreamDepsDone));
|
||
|
}
|
||
|
|
||
|
//=============================================================================
|
||
|
// VlWorkerThread
|
||
|
|
||
|
VlWorkerThread::VlWorkerThread(VlThreadPool* poolp, bool profiling)
|
||
|
: m_poolp(poolp)
|
||
|
, m_profiling(profiling)
|
||
|
, m_exiting(false)
|
||
|
// Must init this last -- after setting up fields that it might read:
|
||
|
, m_cthread(startWorker, this) {}
|
||
|
|
||
|
VlWorkerThread::~VlWorkerThread() {
|
||
|
m_exiting.store(true, std::memory_order_release);
|
||
|
{
|
||
|
VerilatedLockGuard lk(m_mutex);
|
||
|
if (sleeping()) {
|
||
|
wakeUp();
|
||
|
}
|
||
|
}
|
||
|
// The thread should exit; join it.
|
||
|
m_cthread.join();
|
||
|
}
|
||
|
|
||
|
void VlWorkerThread::workerLoop() {
|
||
|
if (VL_UNLIKELY(m_profiling)) {
|
||
|
m_poolp->setupProfilingClientThread();
|
||
|
}
|
||
|
|
||
|
VlNotification alarm;
|
||
|
ExecRec work;
|
||
|
work.m_fnp = NULL;
|
||
|
|
||
|
while (1) {
|
||
|
bool sleep = false;
|
||
|
if (VL_UNLIKELY(!work.m_fnp)) {
|
||
|
// Look for work
|
||
|
VerilatedLockGuard lk(m_mutex);
|
||
|
if (VL_LIKELY(!m_ready.empty())) {
|
||
|
dequeWork(&work);
|
||
|
} else {
|
||
|
// No work available, prepare to sleep. Pass alarm/work
|
||
|
// into m_sleepAlarm so wakeUp will tall this function.
|
||
|
//
|
||
|
// Must modify m_sleepAlarm in the same critical section as
|
||
|
// the check for ready work, otherwise we could race with
|
||
|
// another thread enqueueing work and never be awoken.
|
||
|
m_sleepAlarm.first = &alarm;
|
||
|
m_sleepAlarm.second = &work;
|
||
|
sleep = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Do this here, not above, to avoid a race with the destructor.
|
||
|
if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire)))
|
||
|
break;
|
||
|
|
||
|
if (VL_UNLIKELY(sleep)) {
|
||
|
alarm.waitForNotification(); // ZZZzzzzz
|
||
|
alarm.reset();
|
||
|
}
|
||
|
if (VL_LIKELY(work.m_fnp)) {
|
||
|
work.m_fnp(work.m_evenCycle, work.m_sym);
|
||
|
work.m_fnp = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (VL_UNLIKELY(m_profiling)) {
|
||
|
m_poolp->tearDownProfilingClientThread();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VlWorkerThread::startWorker(VlWorkerThread* workerp) {
|
||
|
workerp->workerLoop();
|
||
|
}
|
||
|
|
||
|
//=============================================================================
|
||
|
// VlThreadPool
|
||
|
|
||
|
VlThreadPool::VlThreadPool(int nThreads, bool profiling)
|
||
|
: m_profiling(profiling) {
|
||
|
// --threads N passes nThreads=N-1, as the "main" threads counts as 1
|
||
|
unsigned cpus = std::thread::hardware_concurrency();
|
||
|
if (cpus < nThreads+1) {
|
||
|
VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with"
|
||
|
" --threads %d; may run slow.\n", cpus, nThreads+1);
|
||
|
}
|
||
|
// Create'em
|
||
|
for (int i=0; i<nThreads; ++i) {
|
||
|
m_workers.push_back(new VlWorkerThread(this, profiling));
|
||
|
}
|
||
|
// Set up a profile buffer for the current thread too -- on the
|
||
|
// assumption that it's the same thread that calls eval and may be
|
||
|
// donated to run mtasks during the eval.
|
||
|
if (VL_UNLIKELY(m_profiling)) {
|
||
|
setupProfilingClientThread();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
VlThreadPool::~VlThreadPool() {
|
||
|
for (int i = 0; i < m_workers.size(); ++i) {
|
||
|
// Each ~WorkerThread will wait for its thread to exit.
|
||
|
delete m_workers[i];
|
||
|
}
|
||
|
if (VL_UNLIKELY(m_profiling)) {
|
||
|
tearDownProfilingClientThread();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VlThreadPool::tearDownProfilingClientThread() {
|
||
|
assert(t_profilep);
|
||
|
delete t_profilep;
|
||
|
t_profilep = NULL;
|
||
|
}
|
||
|
|
||
|
void VlThreadPool::setupProfilingClientThread() {
|
||
|
assert(!t_profilep);
|
||
|
t_profilep = new ProfileTrace;
|
||
|
// Reserve some space in the thread-local profiling buffer;
|
||
|
// try not to malloc while collecting profiling.
|
||
|
t_profilep->reserve(4096);
|
||
|
{
|
||
|
VerilatedLockGuard lk(m_mutex);
|
||
|
m_allProfiles.insert(t_profilep);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VlThreadPool::profileAppendAll(const VlProfileRec& rec) {
|
||
|
VerilatedLockGuard lk(m_mutex);
|
||
|
for (ProfileSet::iterator it = m_allProfiles.begin();
|
||
|
it != m_allProfiles.end(); ++it) {
|
||
|
// Every thread's profile trace gets a copy of rec.
|
||
|
(*it)->emplace_back(rec);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VlThreadPool::profileDump(const char* filenamep, vluint64_t ticksElapsed) {
|
||
|
VerilatedLockGuard lk(m_mutex);
|
||
|
VL_DEBUG_IF(VL_DBG_MSGF("+prof+threads writing to '%s'\n", filenamep););
|
||
|
|
||
|
FILE* fp = fopen(filenamep, "w");
|
||
|
if (VL_UNLIKELY(!fp)) {
|
||
|
VL_FATAL_MT(filenamep, 0, "", "+prof+threads+file file not writable");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// TODO Perhaps merge with verilated_coverage output format, so can
|
||
|
// have a common merging and reporting tool, etc.
|
||
|
fprintf(fp, "VLPROFTHREAD 1.0 # Verilator thread profile dump version 1.0\n");
|
||
|
fprintf(fp, "VLPROF arg --threads %" VL_PRI64 "u\n",
|
||
|
vluint64_t(m_workers.size()+1));
|
||
|
fprintf(fp, "VLPROF arg +verilator+prof+threads+start+%" VL_PRI64 "u\n",
|
||
|
Verilated::profThreadsStart());
|
||
|
fprintf(fp, "VLPROF arg +verilator+prof+threads+window+%u\n",
|
||
|
Verilated::profThreadsWindow());
|
||
|
fprintf(fp, "VLPROF stat yields %" VL_PRI64 "u\n",
|
||
|
VlNotification::yields());
|
||
|
|
||
|
vluint32_t thread_id = 0;
|
||
|
for (ProfileSet::iterator pit = m_allProfiles.begin();
|
||
|
pit != m_allProfiles.end(); ++pit) {
|
||
|
++thread_id;
|
||
|
|
||
|
bool printing = false; // False while in warmup phase
|
||
|
for (ProfileTrace::iterator eit = (*pit)->begin();
|
||
|
eit != (*pit)->end(); ++eit) {
|
||
|
switch (eit->m_type) {
|
||
|
case VlProfileRec::TYPE_BARRIER:
|
||
|
printing = true;
|
||
|
break;
|
||
|
case VlProfileRec::TYPE_MTASK_RUN:
|
||
|
if (!printing) break;
|
||
|
fprintf(fp, "VLPROF mtask %d"
|
||
|
" start %" VL_PRI64"u end %" VL_PRI64"u elapsed %" VL_PRI64 "u"
|
||
|
" predict_time %u cpu %u on thread %u\n",
|
||
|
eit->m_mtaskId,
|
||
|
eit->m_startTime,
|
||
|
eit->m_endTime,
|
||
|
(eit->m_endTime - eit->m_startTime),
|
||
|
eit->m_predictTime,
|
||
|
eit->m_cpu,
|
||
|
thread_id);
|
||
|
break;
|
||
|
default: assert(false);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
fprintf(fp, "VLPROF stat ticks %" VL_PRI64 "u\n",
|
||
|
ticksElapsed);
|
||
|
|
||
|
fclose(fp);
|
||
|
}
|