forked from github/verilator
ff425369ac
Trace initialization (tracep->decl* functions) used to explicitly pass the complete hierarchical names of signals as string constants. This contains a lot of redundancy (path prefixes), does not scale well with large designs and resulted in .rodata sections (the string constants) in ELF executables being extremely large. This patch changes the API of trace initialization that allows pushing and popping name prefixes as we walk the hierarchy tree, which are prepended to declared signal names at run-time during trace initialization. This in turn allows us to emit repeat path/name components only once, effectively removing all duplicate path prefixes. On SweRV EH1 this reduces the .rodata section in a --trace build by 94%. Additionally, trace declarations are now emitted in lexical order by hierarchical signal names, and the top level trace initialization function respects --output-split-ctrace.
404 lines
16 KiB
C++
404 lines
16 KiB
C++
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
//=============================================================================
|
|
//
|
|
// Code available from: https://verilator.org
|
|
//
|
|
// Copyright 2001-2021 by Wilson Snyder. This program is free software; you
|
|
// can redistribute it and/or modify it under the terms of either the GNU
|
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
// Version 2.0.
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
//
|
|
//=============================================================================
|
|
///
|
|
/// \file
|
|
/// \brief Verilated internal common-tracing header
|
|
///
|
|
/// This file is not part of the Verilated public-facing API.
|
|
/// It is only for internal use by Verilated tracing routines.
|
|
///
|
|
//=============================================================================
|
|
|
|
#ifndef VERILATOR_VERILATED_TRACE_H_
|
|
#define VERILATOR_VERILATED_TRACE_H_
|
|
|
|
// clang-format off
|
|
|
|
#include "verilated.h"
|
|
#include "verilated_trace_defs.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#ifdef VL_TRACE_THREADED
|
|
# include <condition_variable>
|
|
# include <deque>
|
|
# include <thread>
|
|
#endif
|
|
|
|
// clang-format on
|
|
|
|
#ifdef VL_TRACE_THREADED
|
|
//=============================================================================
|
|
// Threaded tracing
|
|
|
|
// A simple synchronized first in first out queue
|
|
template <class T> class VerilatedThreadQueue final { // LCOV_EXCL_LINE // lcov bug
|
|
private:
|
|
VerilatedMutex m_mutex; // Protects m_queue
|
|
std::condition_variable_any m_cv;
|
|
std::deque<T> m_queue VL_GUARDED_BY(m_mutex);
|
|
|
|
public:
|
|
// Put an element at the back of the queue
|
|
void put(T value) VL_MT_SAFE_EXCLUDES(m_mutex) {
|
|
const VerilatedLockGuard lock{m_mutex};
|
|
m_queue.push_back(value);
|
|
m_cv.notify_one();
|
|
}
|
|
|
|
// Put an element at the front of the queue
|
|
void put_front(T value) VL_MT_SAFE_EXCLUDES(m_mutex) {
|
|
const VerilatedLockGuard lock{m_mutex};
|
|
m_queue.push_front(value);
|
|
m_cv.notify_one();
|
|
}
|
|
|
|
// Get an element from the front of the queue. Blocks if none available
|
|
T get() VL_MT_SAFE_EXCLUDES(m_mutex) {
|
|
VerilatedLockGuard lock{m_mutex};
|
|
m_cv.wait(lock, [this]() VL_REQUIRES(m_mutex) { return !m_queue.empty(); });
|
|
assert(!m_queue.empty());
|
|
T value = m_queue.front();
|
|
m_queue.pop_front();
|
|
return value;
|
|
}
|
|
|
|
// Non blocking get
|
|
bool tryGet(T& result) VL_MT_SAFE_EXCLUDES(m_mutex) {
|
|
const VerilatedLockGuard lockGuard{m_mutex};
|
|
if (m_queue.empty()) return false;
|
|
result = m_queue.front();
|
|
m_queue.pop_front();
|
|
return true;
|
|
}
|
|
};
|
|
|
|
// Commands used by thread tracing. Anonymous enum in class, as we want
|
|
// it scoped, but we also want the automatic conversion to integer types.
|
|
class VerilatedTraceCommand final {
|
|
public:
|
|
// These must all fit in 4 bit at the moment, as the tracing routines
|
|
// pack parameters in the top bits.
|
|
enum : vluint8_t {
|
|
CHG_BIT_0 = 0x0,
|
|
CHG_BIT_1 = 0x1,
|
|
CHG_CDATA = 0x2,
|
|
CHG_SDATA = 0x3,
|
|
CHG_IDATA = 0x4,
|
|
CHG_QDATA = 0x5,
|
|
CHG_WDATA = 0x6,
|
|
CHG_DOUBLE = 0x8,
|
|
// TODO: full..
|
|
TIME_CHANGE = 0xd,
|
|
END = 0xe, // End of buffer
|
|
SHUTDOWN = 0xf // Shutdown worker thread, also marks end of buffer
|
|
};
|
|
};
|
|
#endif
|
|
|
|
//=============================================================================
|
|
// VerilatedTrace
|
|
|
|
// VerilatedTrace uses F-bounded polymorphism to access duck-typed
|
|
// implementations in the format specific derived class, which must be passed
|
|
// as the type parameter T_Derived
|
|
template <class T_Derived> class VerilatedTrace VL_NOT_FINAL {
|
|
public:
|
|
//=========================================================================
|
|
// Generic tracing internals
|
|
|
|
using initCb_t = void (*)(void*, T_Derived*, vluint32_t); // Type of init callbacks
|
|
using dumpCb_t = void (*)(void*, T_Derived*); // Type of all but init callbacks
|
|
|
|
private:
|
|
struct CallbackRecord {
|
|
// Note: would make these fields const, but some old STL implementations
|
|
// (the one in Ubuntu 14.04 with GCC 4.8.4 in particular) use the
|
|
// assignment operator on inserting into collections, so they don't work
|
|
// with const fields...
|
|
union {
|
|
initCb_t m_initCb; // The callback function
|
|
dumpCb_t m_dumpCb; // The callback function
|
|
};
|
|
void* m_userp; // The user pointer to pass to the callback (the symbol table)
|
|
CallbackRecord(initCb_t cb, void* userp)
|
|
: m_initCb{cb}
|
|
, m_userp{userp} {}
|
|
CallbackRecord(dumpCb_t cb, void* userp)
|
|
: m_dumpCb{cb}
|
|
, m_userp{userp} {}
|
|
};
|
|
|
|
vluint32_t* m_sigs_oldvalp; // Old value store
|
|
vluint64_t m_timeLastDump; // Last time we did a dump
|
|
std::vector<CallbackRecord> m_initCbs; // Routines to initialize traciong
|
|
std::vector<CallbackRecord> m_fullCbs; // Routines to perform full dump
|
|
std::vector<CallbackRecord> m_chgCbs; // Routines to perform incremental dump
|
|
std::vector<CallbackRecord> m_cleanupCbs; // Routines to call at the end of dump
|
|
bool m_fullDump; // Whether a full dump is required on the next call to 'dump'
|
|
vluint32_t m_nextCode; // Next code number to assign
|
|
vluint32_t m_numSignals; // Number of distinct signals
|
|
vluint32_t m_maxBits; // Number of bits in the widest signal
|
|
std::vector<std::string> m_namePrefixStack{""}; // Path prefixes to add to signal names
|
|
char m_scopeEscape;
|
|
double m_timeRes; // Time resolution (ns/ms etc)
|
|
double m_timeUnit; // Time units (ns/ms etc)
|
|
|
|
void addCallbackRecord(std::vector<CallbackRecord>& cbVec, CallbackRecord& cbRec)
|
|
VL_MT_SAFE_EXCLUDES(m_mutex);
|
|
|
|
// Equivalent to 'this' but is of the sub-type 'T_Derived*'. Use 'self()->'
|
|
// to access duck-typed functions to avoid a virtual function call.
|
|
T_Derived* self() { return static_cast<T_Derived*>(this); }
|
|
|
|
// Flush any remaining data for this file
|
|
static void onFlush(void* selfp) VL_MT_UNSAFE_ONE;
|
|
// Close the file on termination
|
|
static void onExit(void* selfp) VL_MT_UNSAFE_ONE;
|
|
|
|
#ifdef VL_TRACE_THREADED
|
|
// Number of total trace buffers that have been allocated
|
|
vluint32_t m_numTraceBuffers;
|
|
|
|
// Size of trace buffers
|
|
size_t m_traceBufferSize;
|
|
|
|
// Buffers handed to worker for processing
|
|
VerilatedThreadQueue<vluint32_t*> m_buffersToWorker;
|
|
// Buffers returned from worker after processing
|
|
VerilatedThreadQueue<vluint32_t*> m_buffersFromWorker;
|
|
|
|
// Get a new trace buffer that can be populated. May block if none available
|
|
vluint32_t* getTraceBuffer();
|
|
|
|
// Write pointer into current buffer
|
|
vluint32_t* m_traceBufferWritep;
|
|
|
|
// End of trace buffer
|
|
vluint32_t* m_traceBufferEndp;
|
|
|
|
// The worker thread itself
|
|
std::unique_ptr<std::thread> m_workerThread;
|
|
|
|
// The function executed by the worker thread
|
|
void workerThreadMain();
|
|
|
|
// Wait until given buffer is placed in m_buffersFromWorker
|
|
void waitForBuffer(const vluint32_t* bufferp);
|
|
|
|
// Shut down and join worker, if it's running, otherwise do nothing
|
|
void shutdownWorker();
|
|
#endif
|
|
|
|
// CONSTRUCTORS
|
|
VL_UNCOPYABLE(VerilatedTrace);
|
|
|
|
protected:
|
|
//=========================================================================
|
|
// Internals available to format specific implementations
|
|
|
|
VerilatedMutex m_mutex; // Ensure dump() etc only called from single thread
|
|
|
|
vluint32_t nextCode() const { return m_nextCode; }
|
|
vluint32_t numSignals() const { return m_numSignals; }
|
|
vluint32_t maxBits() const { return m_maxBits; }
|
|
void fullDump(bool value) { m_fullDump = value; }
|
|
vluint64_t timeLastDump() { return m_timeLastDump; }
|
|
|
|
double timeRes() const { return m_timeRes; }
|
|
double timeUnit() const { return m_timeUnit; }
|
|
std::string timeResStr() const;
|
|
|
|
void traceInit() VL_MT_UNSAFE;
|
|
|
|
void declCode(vluint32_t code, vluint32_t bits, bool tri);
|
|
|
|
// Is this an escape?
|
|
bool isScopeEscape(char c) { return std::isspace(c) || c == m_scopeEscape; }
|
|
// Character that splits scopes. Note whitespace are ALWAYS escapes.
|
|
char scopeEscape() { return m_scopeEscape; }
|
|
// Prefix to assume in signal declarations
|
|
const std::string& namePrefix() const { return m_namePrefixStack.back(); }
|
|
|
|
void closeBase();
|
|
void flushBase();
|
|
|
|
//=========================================================================
|
|
// Virtual functions to be provided by the format specific implementation
|
|
|
|
// Called when the trace moves forward to a new time point
|
|
virtual void emitTimeChange(vluint64_t timeui) = 0;
|
|
|
|
// These hooks are called before a full or change based dump is produced.
|
|
// The return value indicates whether to proceed with the dump.
|
|
virtual bool preFullDump() = 0;
|
|
virtual bool preChangeDump() = 0;
|
|
|
|
public:
|
|
//=========================================================================
|
|
// External interface to client code
|
|
|
|
explicit VerilatedTrace();
|
|
~VerilatedTrace();
|
|
|
|
// Set time units (s/ms, defaults to ns)
|
|
void set_time_unit(const char* unitp) VL_MT_SAFE;
|
|
void set_time_unit(const std::string& unit) VL_MT_SAFE;
|
|
// Set time resolution (s/ms, defaults to ns)
|
|
void set_time_resolution(const char* unitp) VL_MT_SAFE;
|
|
void set_time_resolution(const std::string& unit) VL_MT_SAFE;
|
|
|
|
// Call
|
|
void dump(vluint64_t timeui) VL_MT_SAFE_EXCLUDES(m_mutex);
|
|
|
|
//=========================================================================
|
|
// Non-hot path internal interface to Verilator generated code
|
|
|
|
void addInitCb(initCb_t cb, void* userp) VL_MT_SAFE;
|
|
void addFullCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
|
|
void addChgCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
|
|
void addCleanupCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
|
|
|
|
void scopeEscape(char flag) { m_scopeEscape = flag; }
|
|
|
|
void pushNamePrefix(const std::string&);
|
|
void popNamePrefix(unsigned count = 1);
|
|
|
|
//=========================================================================
|
|
// Hot path internal interface to Verilator generated code
|
|
|
|
// Implementation note: We rely on the following duck-typed implementations
|
|
// in the derived class T_Derived. These emit* functions record a format
|
|
// specific trace entry. Normally one would use pure virtual functions for
|
|
// these here, but we cannot afford dynamic dispatch for calling these as
|
|
// this is very hot code during tracing.
|
|
|
|
// duck-typed void emitBit(vluint32_t code, CData newval) = 0;
|
|
// duck-typed void emitCData(vluint32_t code, CData newval, int bits) = 0;
|
|
// duck-typed void emitSData(vluint32_t code, SData newval, int bits) = 0;
|
|
// duck-typed void emitIData(vluint32_t code, IData newval, int bits) = 0;
|
|
// duck-typed void emitQData(vluint32_t code, QData newval, int bits) = 0;
|
|
// duck-typed void emitWData(vluint32_t code, const WData* newvalp, int bits) = 0;
|
|
// duck-typed void emitDouble(vluint32_t code, double newval) = 0;
|
|
|
|
vluint32_t* oldp(vluint32_t code) { return m_sigs_oldvalp + code; }
|
|
|
|
// Write to previous value buffer value and emit trace entry.
|
|
void fullBit(vluint32_t* oldp, CData newval);
|
|
void fullCData(vluint32_t* oldp, CData newval, int bits);
|
|
void fullSData(vluint32_t* oldp, SData newval, int bits);
|
|
void fullIData(vluint32_t* oldp, IData newval, int bits);
|
|
void fullQData(vluint32_t* oldp, QData newval, int bits);
|
|
void fullWData(vluint32_t* oldp, const WData* newvalp, int bits);
|
|
void fullDouble(vluint32_t* oldp, double newval);
|
|
|
|
#ifdef VL_TRACE_THREADED
|
|
// Threaded tracing. Just dump everything in the trace buffer
|
|
inline void chgBit(vluint32_t code, CData newval) {
|
|
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_BIT_0 | newval;
|
|
m_traceBufferWritep[1] = code;
|
|
m_traceBufferWritep += 2;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgCData(vluint32_t code, CData newval, int bits) {
|
|
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_CDATA;
|
|
m_traceBufferWritep[1] = code;
|
|
m_traceBufferWritep[2] = newval;
|
|
m_traceBufferWritep += 3;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgSData(vluint32_t code, SData newval, int bits) {
|
|
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_SDATA;
|
|
m_traceBufferWritep[1] = code;
|
|
m_traceBufferWritep[2] = newval;
|
|
m_traceBufferWritep += 3;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgIData(vluint32_t code, IData newval, int bits) {
|
|
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_IDATA;
|
|
m_traceBufferWritep[1] = code;
|
|
m_traceBufferWritep[2] = newval;
|
|
m_traceBufferWritep += 3;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgQData(vluint32_t code, QData newval, int bits) {
|
|
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_QDATA;
|
|
m_traceBufferWritep[1] = code;
|
|
*reinterpret_cast<QData*>(m_traceBufferWritep + 2) = newval;
|
|
m_traceBufferWritep += 4;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgWData(vluint32_t code, const WData* newvalp, int bits) {
|
|
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_WDATA;
|
|
m_traceBufferWritep[1] = code;
|
|
m_traceBufferWritep += 2;
|
|
for (int i = 0; i < (bits + 31) / 32; ++i) { *m_traceBufferWritep++ = newvalp[i]; }
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
inline void chgDouble(vluint32_t code, double newval) {
|
|
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_DOUBLE;
|
|
m_traceBufferWritep[1] = code;
|
|
// cppcheck-suppress invalidPointerCast
|
|
*reinterpret_cast<double*>(m_traceBufferWritep + 2) = newval;
|
|
m_traceBufferWritep += 4;
|
|
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
|
}
|
|
|
|
#define CHG(name) chg##name##Impl
|
|
#else
|
|
#define CHG(name) chg##name
|
|
#endif
|
|
|
|
// In non-threaded mode, these are called directly by the trace callbacks,
|
|
// and are called chg*. In threaded mode, they are called by the worker
|
|
// thread and are called chg*Impl
|
|
|
|
// Check previous dumped value of signal. If changed, then emit trace entry
|
|
inline void CHG(Bit)(vluint32_t* oldp, CData newval) {
|
|
const vluint32_t diff = *oldp ^ newval;
|
|
if (VL_UNLIKELY(diff)) fullBit(oldp, newval);
|
|
}
|
|
inline void CHG(CData)(vluint32_t* oldp, CData newval, int bits) {
|
|
const vluint32_t diff = *oldp ^ newval;
|
|
if (VL_UNLIKELY(diff)) fullCData(oldp, newval, bits);
|
|
}
|
|
inline void CHG(SData)(vluint32_t* oldp, SData newval, int bits) {
|
|
const vluint32_t diff = *oldp ^ newval;
|
|
if (VL_UNLIKELY(diff)) fullSData(oldp, newval, bits);
|
|
}
|
|
inline void CHG(IData)(vluint32_t* oldp, IData newval, int bits) {
|
|
const vluint32_t diff = *oldp ^ newval;
|
|
if (VL_UNLIKELY(diff)) fullIData(oldp, newval, bits);
|
|
}
|
|
inline void CHG(QData)(vluint32_t* oldp, QData newval, int bits) {
|
|
const vluint64_t diff = *reinterpret_cast<QData*>(oldp) ^ newval;
|
|
if (VL_UNLIKELY(diff)) fullQData(oldp, newval, bits);
|
|
}
|
|
inline void CHG(WData)(vluint32_t* oldp, const WData* newvalp, int bits) {
|
|
for (int i = 0; i < (bits + 31) / 32; ++i) {
|
|
if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) {
|
|
fullWData(oldp, newvalp, bits);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
inline void CHG(Double)(vluint32_t* oldp, double newval) {
|
|
// cppcheck-suppress invalidPointerCast
|
|
if (VL_UNLIKELY(*reinterpret_cast<double*>(oldp) != newval)) fullDouble(oldp, newval);
|
|
}
|
|
|
|
#undef CHG
|
|
};
|
|
#endif // guard
|