mirror of
https://github.com/verilator/verilator.git
synced 2025-01-01 04:07:34 +00:00
Make parallel tracing switchable at run-time
This commit is contained in:
parent
efb5caad22
commit
a4ed3c2086
@ -182,7 +182,7 @@ private:
|
||||
|
||||
const bool m_offload; // Whether to use the offload thread (ignored if !VL_THREADED)
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
#ifdef VL_THREADED
|
||||
struct ParallelWorkerData {
|
||||
const dumpCb_t m_cb; // The callback
|
||||
void* const m_userp; // The use pointer to pass to the callback
|
||||
@ -317,6 +317,14 @@ protected:
|
||||
static constexpr bool offload() { return false; }
|
||||
#endif
|
||||
|
||||
inline bool parallel() const {
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
//=========================================================================
|
||||
// Virtual functions to be provided by the format specific implementation
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include "verilated_intrinsics.h"
|
||||
#include "verilated_trace.h"
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
#ifdef VL_THREADED
|
||||
# include "verilated_threads.h"
|
||||
# include <list>
|
||||
#endif
|
||||
@ -462,7 +462,7 @@ void VerilatedTrace<VL_SUB_T, VL_BUF_T>::dumpvars(int level, const std::string&
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
#ifdef VL_THREADED
|
||||
template <> //
|
||||
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::parallelWorkerTask(void* datap, bool) {
|
||||
ParallelWorkerData* const wdp = reinterpret_cast<ParallelWorkerData*>(datap);
|
||||
@ -490,45 +490,47 @@ template <> VL_ATTR_NOINLINE void VerilatedTrace<VL_SUB_T, VL_BUF_T>::ParallelWo
|
||||
|
||||
template <>
|
||||
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runCallbacks(const std::vector<CallbackRecord>& cbVec) {
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
// If tracing in parallel, dispatch to the thread pool
|
||||
VlThreadPool* threadPoolp = static_cast<VlThreadPool*>(m_contextp->threadPoolp());
|
||||
// List of work items for thread (std::list, as ParallelWorkerData is not movable)
|
||||
std::list<ParallelWorkerData> workerData;
|
||||
// We use the whole pool + the main thread
|
||||
const unsigned threads = threadPoolp->numThreads() + 1;
|
||||
// Main thread executes all jobs with index % threads == 0
|
||||
std::vector<ParallelWorkerData*> mainThreadWorkerData;
|
||||
// Enuque all the jobs
|
||||
for (unsigned i = 0; i < cbVec.size(); ++i) {
|
||||
const CallbackRecord& cbr = cbVec[i];
|
||||
// Always get the trace buffer on the main thread
|
||||
Buffer* const bufp = getTraceBuffer();
|
||||
// Create new work item
|
||||
workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp);
|
||||
// Grab the new work item
|
||||
ParallelWorkerData* const itemp = &workerData.back();
|
||||
// Enqueue task to thread pool, or main thread
|
||||
if (unsigned rem = i % threads) {
|
||||
threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp);
|
||||
} else {
|
||||
mainThreadWorkerData.push_back(itemp);
|
||||
#ifdef VL_THREADED
|
||||
if (parallel()) {
|
||||
// If tracing in parallel, dispatch to the thread pool
|
||||
VlThreadPool* threadPoolp = static_cast<VlThreadPool*>(m_contextp->threadPoolp());
|
||||
// List of work items for thread (std::list, as ParallelWorkerData is not movable)
|
||||
std::list<ParallelWorkerData> workerData;
|
||||
// We use the whole pool + the main thread
|
||||
const unsigned threads = threadPoolp->numThreads() + 1;
|
||||
// Main thread executes all jobs with index % threads == 0
|
||||
std::vector<ParallelWorkerData*> mainThreadWorkerData;
|
||||
// Enuque all the jobs
|
||||
for (unsigned i = 0; i < cbVec.size(); ++i) {
|
||||
const CallbackRecord& cbr = cbVec[i];
|
||||
// Always get the trace buffer on the main thread
|
||||
Buffer* const bufp = getTraceBuffer();
|
||||
// Create new work item
|
||||
workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp);
|
||||
// Grab the new work item
|
||||
ParallelWorkerData* const itemp = &workerData.back();
|
||||
// Enqueue task to thread pool, or main thread
|
||||
if (unsigned rem = i % threads) {
|
||||
threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp);
|
||||
} else {
|
||||
mainThreadWorkerData.push_back(itemp);
|
||||
}
|
||||
}
|
||||
// Execute main thead jobs
|
||||
for (ParallelWorkerData* const itemp : mainThreadWorkerData) {
|
||||
parallelWorkerTask(itemp, false);
|
||||
}
|
||||
// Commit all trace buffers in order
|
||||
for (ParallelWorkerData& item : workerData) {
|
||||
// Wait until ready
|
||||
item.wait();
|
||||
// Commit the buffer
|
||||
commitTraceBuffer(item.m_bufp);
|
||||
}
|
||||
}
|
||||
// Execute main thead jobs
|
||||
for (ParallelWorkerData* const itemp : mainThreadWorkerData) {
|
||||
parallelWorkerTask(itemp, false);
|
||||
}
|
||||
// Commit all trace buffers in order
|
||||
for (ParallelWorkerData& item : workerData) {
|
||||
// Wait until ready
|
||||
item.wait();
|
||||
// Commit the buffer
|
||||
commitTraceBuffer(item.m_bufp);
|
||||
}
|
||||
|
||||
// Done
|
||||
return;
|
||||
// Done
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
// Fall back on sequential execution
|
||||
for (const CallbackRecord& cbr : cbVec) {
|
||||
|
@ -230,9 +230,11 @@ VerilatedVcd::~VerilatedVcd() {
|
||||
if (m_wrBufp) VL_DO_CLEAR(delete[] m_wrBufp, m_wrBufp = nullptr);
|
||||
deleteNameMap();
|
||||
if (m_filep && m_fileNewed) VL_DO_CLEAR(delete m_filep, m_filep = nullptr);
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
assert(m_numBuffers == m_freeBuffers.size());
|
||||
for (auto& pair : m_freeBuffers) VL_DO_CLEAR(delete[] pair.first, pair.first = nullptr);
|
||||
#ifdef VL_THREADED
|
||||
if (parallel()) {
|
||||
assert(m_numBuffers == m_freeBuffers.size());
|
||||
for (auto& pair : m_freeBuffers) VL_DO_CLEAR(delete[] pair.first, pair.first = nullptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -572,49 +574,55 @@ void VerilatedVcd::declDouble(uint32_t code, const char* name, bool array, int a
|
||||
|
||||
VerilatedVcd::Buffer* VerilatedVcd::getTraceBuffer() {
|
||||
VerilatedVcd::Buffer* const bufp = new Buffer{*this};
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
// Note: This is called from VeriltedVcd::dump, which already holds the lock
|
||||
// If no buffer available, allocate a new one
|
||||
if (m_freeBuffers.empty()) {
|
||||
constexpr size_t pageSize = 4096;
|
||||
// 4 * m_maxSignalBytes, so we can reserve 2 * m_maxSignalBytes at the end for safety
|
||||
size_t startingSize = roundUpToMultipleOf<pageSize>(4 * m_maxSignalBytes);
|
||||
m_freeBuffers.emplace_back(new char[startingSize], startingSize);
|
||||
++m_numBuffers;
|
||||
#ifdef VL_THREADED
|
||||
if (parallel()) {
|
||||
// Note: This is called from VeriltedVcd::dump, which already holds the lock
|
||||
// If no buffer available, allocate a new one
|
||||
if (m_freeBuffers.empty()) {
|
||||
constexpr size_t pageSize = 4096;
|
||||
// 4 * m_maxSignalBytes, so we can reserve 2 * m_maxSignalBytes at the end for safety
|
||||
size_t startingSize = roundUpToMultipleOf<pageSize>(4 * m_maxSignalBytes);
|
||||
m_freeBuffers.emplace_back(new char[startingSize], startingSize);
|
||||
++m_numBuffers;
|
||||
}
|
||||
// Grab a buffer
|
||||
const auto pair = m_freeBuffers.back();
|
||||
m_freeBuffers.pop_back();
|
||||
// Initialize
|
||||
bufp->m_writep = bufp->m_bufp = pair.first;
|
||||
bufp->m_size = pair.second;
|
||||
bufp->adjustGrowp();
|
||||
}
|
||||
// Grab a buffer
|
||||
const auto pair = m_freeBuffers.back();
|
||||
m_freeBuffers.pop_back();
|
||||
// Initialize
|
||||
bufp->m_writep = bufp->m_bufp = pair.first;
|
||||
bufp->m_size = pair.second;
|
||||
bufp->adjustGrowp();
|
||||
#endif
|
||||
// Return the buffer
|
||||
return bufp;
|
||||
}
|
||||
|
||||
void VerilatedVcd::commitTraceBuffer(VerilatedVcd::Buffer* bufp) {
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
// Note: This is called from VeriltedVcd::dump, which already holds the lock
|
||||
// Resize output buffer. Note, we use the full size of the trace buffer, as
|
||||
// this is a lot more stable than the actual occupancy of the trace buffer.
|
||||
// This helps us to avoid re-allocations due to small size changes.
|
||||
bufferResize(bufp->m_size);
|
||||
// Compute occupancy of buffer
|
||||
const size_t usedSize = bufp->m_writep - bufp->m_bufp;
|
||||
// Copy to output buffer
|
||||
std::memcpy(m_writep, bufp->m_bufp, usedSize);
|
||||
// Adjust write pointer
|
||||
m_writep += usedSize;
|
||||
// Flush if necessary
|
||||
bufferCheck();
|
||||
// Put buffer back on free list
|
||||
m_freeBuffers.emplace_back(bufp->m_bufp, bufp->m_size);
|
||||
if (parallel()) {
|
||||
#if VL_THREADED
|
||||
// Note: This is called from VeriltedVcd::dump, which already holds the lock
|
||||
// Resize output buffer. Note, we use the full size of the trace buffer, as
|
||||
// this is a lot more stable than the actual occupancy of the trace buffer.
|
||||
// This helps us to avoid re-allocations due to small size changes.
|
||||
bufferResize(bufp->m_size);
|
||||
// Compute occupancy of buffer
|
||||
const size_t usedSize = bufp->m_writep - bufp->m_bufp;
|
||||
// Copy to output buffer
|
||||
std::memcpy(m_writep, bufp->m_bufp, usedSize);
|
||||
// Adjust write pointer
|
||||
m_writep += usedSize;
|
||||
// Flush if necessary
|
||||
bufferCheck();
|
||||
// Put buffer back on free list
|
||||
m_freeBuffers.emplace_back(bufp->m_bufp, bufp->m_size);
|
||||
#else
|
||||
// Needs adjusting for emitTimeChange
|
||||
m_writep = bufp->m_writep;
|
||||
VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable");
|
||||
#endif
|
||||
} else {
|
||||
// Needs adjusting for emitTimeChange
|
||||
m_writep = bufp->m_writep;
|
||||
}
|
||||
delete bufp;
|
||||
}
|
||||
|
||||
@ -656,35 +664,39 @@ void VerilatedVcdBuffer::finishLine(uint32_t code, char* writep) {
|
||||
// suffix, which was stored in the last byte of the suffix buffer entry.
|
||||
m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1];
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
// Double the size of the buffer if necessary
|
||||
if (VL_UNLIKELY(m_writep >= m_growp)) {
|
||||
// Compute occupied size of current buffer
|
||||
const size_t usedSize = m_writep - m_bufp;
|
||||
// We are always doubling the size
|
||||
m_size *= 2;
|
||||
// Allocate the new buffer
|
||||
char* const newBufp = new char[m_size];
|
||||
// Copy from current buffer to new buffer
|
||||
std::memcpy(newBufp, m_bufp, usedSize);
|
||||
// Delete current buffer
|
||||
delete[] m_bufp;
|
||||
// Make new buffer the current buffer
|
||||
m_bufp = newBufp;
|
||||
// Adjust write pointer
|
||||
m_writep = m_bufp + usedSize;
|
||||
// Adjust resize limit
|
||||
adjustGrowp();
|
||||
}
|
||||
if (m_owner.parallel()) {
|
||||
#ifdef VL_THREADED
|
||||
// Double the size of the buffer if necessary
|
||||
if (VL_UNLIKELY(m_writep >= m_growp)) {
|
||||
// Compute occupied size of current buffer
|
||||
const size_t usedSize = m_writep - m_bufp;
|
||||
// We are always doubling the size
|
||||
m_size *= 2;
|
||||
// Allocate the new buffer
|
||||
char* const newBufp = new char[m_size];
|
||||
// Copy from current buffer to new buffer
|
||||
std::memcpy(newBufp, m_bufp, usedSize);
|
||||
// Delete current buffer
|
||||
delete[] m_bufp;
|
||||
// Make new buffer the current buffer
|
||||
m_bufp = newBufp;
|
||||
// Adjust write pointer
|
||||
m_writep = m_bufp + usedSize;
|
||||
// Adjust resize limit
|
||||
adjustGrowp();
|
||||
}
|
||||
#else
|
||||
// Flush the write buffer if there's not enough space left for new information
|
||||
// We only call this once per vector, so we need enough slop for a very wide "b###" line
|
||||
if (VL_UNLIKELY(m_writep > m_wrFlushp)) {
|
||||
m_owner.m_writep = m_writep;
|
||||
m_owner.bufferFlush();
|
||||
m_writep = m_owner.m_writep;
|
||||
}
|
||||
VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable");
|
||||
#endif
|
||||
} else {
|
||||
// Flush the write buffer if there's not enough space left for new information
|
||||
// We only call this once per vector, so we need enough slop for a very wide "b###" line
|
||||
if (VL_UNLIKELY(m_writep > m_wrFlushp)) {
|
||||
m_owner.m_writep = m_writep;
|
||||
m_owner.bufferFlush();
|
||||
m_writep = m_owner.m_writep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -65,7 +65,7 @@ private:
|
||||
using NameMap = std::map<const std::string, const std::string>;
|
||||
NameMap* m_namemapp = nullptr; // List of names for the header
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
#ifdef VL_THREADED
|
||||
// Vector of free trace buffers as (pointer, size) pairs.
|
||||
std::vector<std::pair<char*, size_t>> m_freeBuffers;
|
||||
size_t m_numBuffers = 0; // Number of trace buffers allocated
|
||||
@ -168,30 +168,30 @@ class VerilatedVcdBuffer VL_NOT_FINAL {
|
||||
|
||||
VerilatedVcd& m_owner; // Trace file owning this buffer. Required by subclasses.
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
char* m_writep = nullptr; // Write pointer into m_bufp
|
||||
char* m_bufp = nullptr; // The beginning of the trace buffer
|
||||
size_t m_size = 0; // The size of the buffer at m_bufp
|
||||
char* m_growp = nullptr; // Resize limit pointer
|
||||
#else
|
||||
char* m_writep = m_owner.m_writep; // Write pointer into output buffer
|
||||
char* const m_wrFlushp = m_owner.m_wrFlushp; // Output buffer flush trigger location
|
||||
#endif
|
||||
// Write pointer into output buffer (in parallel mode, this is set up in 'getTraceBuffer')
|
||||
char* m_writep = m_owner.parallel() ? nullptr : m_owner.m_writep;
|
||||
// Output buffer flush trigger location (only used when not parallel)
|
||||
char* const m_wrFlushp = m_owner.parallel() ? nullptr : m_owner.m_wrFlushp;
|
||||
|
||||
// VCD line end string codes + metadata
|
||||
const char* const m_suffixes = m_owner.m_suffixes.data();
|
||||
// The maximum number of bytes a single signal can emit
|
||||
const size_t m_maxSignalBytes = m_owner.m_maxSignalBytes;
|
||||
|
||||
void finishLine(uint32_t code, char* writep);
|
||||
#ifdef VL_THREADED
|
||||
// Additional data for parallel tracing only
|
||||
char* m_bufp = nullptr; // The beginning of the trace buffer
|
||||
size_t m_size = 0; // The size of the buffer at m_bufp
|
||||
char* m_growp = nullptr; // Resize limit pointer
|
||||
|
||||
#ifdef VL_TRACE_PARALLEL
|
||||
void adjustGrowp() {
|
||||
m_growp = (m_bufp + m_size) - (2 * m_maxSignalBytes);
|
||||
assert(m_growp >= m_bufp + m_maxSignalBytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
void finishLine(uint32_t code, char* writep);
|
||||
|
||||
// CONSTRUCTOR
|
||||
explicit VerilatedVcdBuffer(VerilatedVcd& owner)
|
||||
: m_owner{owner} {}
|
||||
|
Loading…
Reference in New Issue
Block a user