verilator/include/verilated_trace.h
Geza Lore 900c023bb5 Refactor trace implementation to allow experimentation
The main goal of this patch is to enable splitting the full and
incremental tracing functions into multiple functions, which can then be
run in parallel at a later stage. It also simplifies further
experimentation as all of the interesting trace code construction now
happens in V3Trace. No functional change is intended by this patch, but
there are some implementation changes in the generated code.

Highlights:
- Pass symbol table directly to trace callbacks for simplicity.
- A new traceRegister function is generated which adds each trace
function as an individual callback, which means we can have multiple
callbacks for each trace function type.
- A new traceCleanup function is generated which clears the activity
flags, as the trace callbacks might be implemented as multiple functions.
- Re-worked sub-function handling so there is no separate sub-function
for each trace activity class. Sub-functions are generate when required
by splitting.
- traceFull/traceChg are now created in V3Trace rather than V3TraceDecl,
this requires carrying the trace value tree in TraceDecl until it
reaches V3Trace where the TraceInc nodes are created (previously a
TraceInc was also created in V3TraceDecl which carries the value).
2020-05-15 18:34:29 +01:00

410 lines
16 KiB
C++

// -*- mode: C++; c-file-style: "cc-mode" -*-
//=============================================================================
//
// THIS MODULE IS PUBLICLY LICENSED
//
// Copyright 2001-2020 by Wilson Snyder. This program is free software; you
// can redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//=============================================================================
///
/// \file
/// \brief Tracing functionality common to all formats
///
//=============================================================================
// SPDIFF_OFF
#ifndef _VERILATED_TRACE_H_
#define _VERILATED_TRACE_H_ 1
// clang-format off
#include "verilated.h"
#include <string>
#include <vector>
#ifdef VL_TRACE_THREADED
# include <condition_variable>
# include <deque>
# include <thread>
#endif
// clang-format on
#ifdef VL_TRACE_THREADED
//=============================================================================
// Threaded tracing
// A simple synchronized first in first out queue
template <class T> class VerilatedThreadQueue {
private:
VerilatedMutex m_mutex; // Protects m_queue
std::condition_variable_any m_cv;
std::deque<T> m_queue VL_GUARDED_BY(m_mutex);
public:
// Put an element at the back of the queue
void put(T value) {
VerilatedLockGuard lock(m_mutex);
m_queue.push_back(value);
m_cv.notify_one();
}
// Put an element at the front of the queue
void put_front(T value) {
VerilatedLockGuard lock(m_mutex);
m_queue.push_front(value);
m_cv.notify_one();
}
// Get an element from the front of the queue. Blocks if none available
T get() {
VerilatedLockGuard lock(m_mutex);
m_cv.wait(lock, [this]() VL_REQUIRES(m_mutex) { return !m_queue.empty(); });
assert(!m_queue.empty());
T value = m_queue.front();
m_queue.pop_front();
return value;
}
// Non blocking get
bool tryGet(T& result) {
VerilatedLockGuard lockGuard(m_mutex);
if (m_queue.empty()) { return false; }
result = m_queue.front();
m_queue.pop_front();
return true;
}
};
// Commands used by thread tracing. Anonymous enum in class, as we want
// it scoped, but we also want the automatic conversion to integer types.
class VerilatedTraceCommand {
public:
// These must all fit in 4 bit at the moment, as the tracing routines
// pack parameters in the top bits.
enum {
CHG_BIT_0 = 0x0,
CHG_BIT_1 = 0x1,
CHG_CDATA = 0x2,
CHG_SDATA = 0x3,
CHG_IDATA = 0x4,
CHG_QDATA = 0x5,
CHG_WDATA = 0x6,
CHG_FLOAT = 0x7,
CHG_DOUBLE = 0x8,
// TODO: full..
TIME_CHANGE = 0xd,
END = 0xe, // End of buffer
SHUTDOWN = 0xf // Shutdown worker thread, also marks end of buffer
};
};
#endif
//=============================================================================
// VerilatedTrace
// VerilatedTrace uses F-bounded polymorphism to access duck-typed
// implementations in the format specific derived class, which must be passed
// as the type parameter T_Derived
template <class T_Derived> class VerilatedTrace {
public:
//=========================================================================
// Generic tracing internals
typedef void (*initCb_t)(void*, T_Derived*, uint32_t); // Type of init callbacks
typedef void (*dumpCb_t)(void*, T_Derived*); // Type of all but init callbacks
private:
struct CallbackRecord {
const union {
initCb_t m_initCb; // The callback function
dumpCb_t m_dumpCb; // The callback function
};
void* const m_userp; // The user pointer to pass to the callback (the symbol table)
CallbackRecord(initCb_t cb, void* userp)
: m_initCb(cb)
, m_userp(userp) {}
CallbackRecord(dumpCb_t cb, void* userp)
: m_dumpCb(cb)
, m_userp(userp) {}
};
vluint32_t* m_sigs_oldvalp; ///< Old value store
vluint64_t m_timeLastDump; ///< Last time we did a dump
std::vector<CallbackRecord> m_initCbs; ///< Routines to initialize traciong
std::vector<CallbackRecord> m_fullCbs; ///< Routines to perform full dump
std::vector<CallbackRecord> m_chgCbs; ///< Routines to perform incremental dump
std::vector<CallbackRecord> m_cleanupCbs; ///< Routines to call at the end of dump
bool m_fullDump; ///< Whether a full dump is required on the next call to 'dump'
vluint32_t m_nextCode; ///< Next code number to assign
vluint32_t m_numSignals; ///< Number of distinct signals
vluint32_t m_maxBits; ///< Number of bits in the widest signal
std::string m_moduleName; ///< Name of module being trace initialized now
char m_scopeEscape;
double m_timeRes; ///< Time resolution (ns/ms etc)
double m_timeUnit; ///< Time units (ns/ms etc)
void addCallbackRecord(std::vector<CallbackRecord>& cbVec, CallbackRecord& cbRec);
// Equivalent to 'this' but is of the sub-type 'T_Derived*'. Use 'self()->'
// to access duck-typed functions to avoid a virtual function call.
T_Derived* self() { return static_cast<T_Derived*>(this); }
#ifdef VL_TRACE_THREADED
// Number of total trace buffers that have been allocated
vluint32_t m_numTraceBuffers;
// Size of trace buffers
size_t m_traceBufferSize;
// Buffers handed to worker for processing
VerilatedThreadQueue<vluint32_t*> m_buffersToWorker;
// Buffers returned from worker after processing
VerilatedThreadQueue<vluint32_t*> m_buffersFromWorker;
// Get a new trace buffer that can be populated. May block if none available
vluint32_t* getTraceBuffer();
// Write pointer into current buffer
vluint32_t* m_traceBufferWritep;
// End of trace buffer
vluint32_t* m_traceBufferEndp;
// The worker thread itself
std::unique_ptr<std::thread> m_workerThread;
// The function executed by the worker thread
void workerThreadMain();
// Wait until given buffer is placed in m_buffersFromWorker
void waitForBuffer(const vluint32_t* bufferp);
// Shut down and join worker, if it's running, otherwise do nothing
void shutdownWorker();
#endif
// CONSTRUCTORS
VL_UNCOPYABLE(VerilatedTrace);
protected:
//=========================================================================
// Internals available to format specific implementations
VerilatedAssertOneThread m_assertOne; ///< Assert only called from single thread
vluint32_t nextCode() const { return m_nextCode; }
vluint32_t numSignals() const { return m_numSignals; }
vluint32_t maxBits() const { return m_maxBits; }
const std::string& moduleName() const { return m_moduleName; }
void fullDump(bool value) { m_fullDump = value; }
vluint64_t timeLastDump() { return m_timeLastDump; }
double timeRes() const { return m_timeRes; }
double timeUnit() const { return m_timeUnit; }
std::string timeResStr() const;
std::string timeUnitStr() const;
void traceInit() VL_MT_UNSAFE;
void declCode(vluint32_t code, vluint32_t bits, bool tri);
/// Is this an escape?
bool isScopeEscape(char c) { return isspace(c) || c == m_scopeEscape; }
/// Character that splits scopes. Note whitespace are ALWAYS escapes.
char scopeEscape() { return m_scopeEscape; }
void close();
void flush();
//=========================================================================
// Virtual functions to be provided by the format specific implementation
// Called when the trace moves forward to a new time point
virtual void emitTimeChange(vluint64_t timeui) = 0;
// These hooks are called before a full or change based dump is produced.
// The return value indicates whether to proceed with the dump.
virtual bool preFullDump() { return true; }
virtual bool preChangeDump() { return true; }
public:
//=========================================================================
// External interface to client code
explicit VerilatedTrace();
~VerilatedTrace();
// Set time units (s/ms, defaults to ns)
void set_time_unit(const char* unitp);
void set_time_unit(const std::string& unit);
// Set time resolution (s/ms, defaults to ns)
void set_time_resolution(const char* unitp);
void set_time_resolution(const std::string& unit);
// Call
void dump(vluint64_t timeui);
//=========================================================================
// Non-hot path internal interface to Verilator generated code
void addInitCb(initCb_t cb, void* userp) VL_MT_UNSAFE_ONE;
void addFullCb(dumpCb_t cb, void* userp) VL_MT_UNSAFE_ONE;
void addChgCb(dumpCb_t cb, void* userp) VL_MT_UNSAFE_ONE;
void addCleanupCb(dumpCb_t cb, void* userp) VL_MT_UNSAFE_ONE;
void changeThread() { m_assertOne.changeThread(); }
void module(const std::string& name) VL_MT_UNSAFE_ONE {
m_assertOne.check();
m_moduleName = name;
}
void scopeEscape(char flag) { m_scopeEscape = flag; }
//=========================================================================
// Hot path internal interface to Verilator generated code
// Implementation note: We rely on the following duck-typed implementations
// in the derived class T_Derived. These emit* functions record a format
// specific trace entry. Normally one would use pure virtual functions for
// these here, but we cannot afford dynamic dispatch for calling these as
// this is very hot code during tracing.
// duck-typed void emitBit(vluint32_t code, CData newval) = 0;
// duck-typed void emitCData(vluint32_t code, CData newval, int bits) = 0;
// duck-typed void emitSData(vluint32_t code, SData newval, int bits) = 0;
// duck-typed void emitIData(vluint32_t code, IData newval, int bits) = 0;
// duck-typed void emitQData(vluint32_t code, QData newval, int bits) = 0;
// duck-typed void emitWData(vluint32_t code, const WData* newvalp, int bits) = 0;
// duck-typed void emitFloat(vluint32_t code, float newval) = 0;
// duck-typed void emitDouble(vluint32_t code, double newval) = 0;
vluint32_t* oldp(vluint32_t code) { return m_sigs_oldvalp + code; }
// Write to previous value buffer value and emit trace entry.
void fullBit(vluint32_t* oldp, CData newval);
void fullCData(vluint32_t* oldp, CData newval, int bits);
void fullSData(vluint32_t* oldp, SData newval, int bits);
void fullIData(vluint32_t* oldp, IData newval, int bits);
void fullQData(vluint32_t* oldp, QData newval, int bits);
void fullWData(vluint32_t* oldp, const WData* newvalp, int bits);
void fullFloat(vluint32_t* oldp, float newval);
void fullDouble(vluint32_t* oldp, double newval);
#ifdef VL_TRACE_THREADED
// Threaded tracing. Just dump everything in the trace buffer
inline void chgBit(vluint32_t code, CData newval) {
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_BIT_0 | newval;
m_traceBufferWritep[1] = code;
m_traceBufferWritep += 2;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgCData(vluint32_t code, CData newval, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_CDATA;
m_traceBufferWritep[1] = code;
m_traceBufferWritep[2] = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgSData(vluint32_t code, SData newval, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_SDATA;
m_traceBufferWritep[1] = code;
m_traceBufferWritep[2] = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgIData(vluint32_t code, IData newval, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_IDATA;
m_traceBufferWritep[1] = code;
m_traceBufferWritep[2] = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgQData(vluint32_t code, QData newval, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_QDATA;
m_traceBufferWritep[1] = code;
*reinterpret_cast<QData*>(m_traceBufferWritep + 2) = newval;
m_traceBufferWritep += 4;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgWData(vluint32_t code, const WData* newvalp, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_WDATA;
m_traceBufferWritep[1] = code;
m_traceBufferWritep += 2;
for (int i = 0; i < (bits + 31) / 32; ++i) { *m_traceBufferWritep++ = newvalp[i]; }
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgFloat(vluint32_t code, float newval) {
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_FLOAT;
m_traceBufferWritep[1] = code;
// cppcheck-suppress invalidPointerCast
*reinterpret_cast<float*>(m_traceBufferWritep + 2) = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgDouble(vluint32_t code, double newval) {
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_DOUBLE;
m_traceBufferWritep[1] = code;
// cppcheck-suppress invalidPointerCast
*reinterpret_cast<double*>(m_traceBufferWritep + 2) = newval;
m_traceBufferWritep += 4;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
#define CHG(name) chg##name##Impl
#else
#define CHG(name) chg##name
#endif
// In non-threaded mode, these are called directly by the trace callbacks,
// and are called chg*. In threaded mode, they are called by the worker
// thread and are called chg*Impl
// Check previous dumped value of signal. If changed, then emit trace entry
inline void CHG(Bit)(vluint32_t* oldp, CData newval) {
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullBit(oldp, newval);
}
inline void CHG(CData)(vluint32_t* oldp, CData newval, int bits) {
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullCData(oldp, newval, bits);
}
inline void CHG(SData)(vluint32_t* oldp, SData newval, int bits) {
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullSData(oldp, newval, bits);
}
inline void CHG(IData)(vluint32_t* oldp, IData newval, int bits) {
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullIData(oldp, newval, bits);
}
inline void CHG(QData)(vluint32_t* oldp, QData newval, int bits) {
const vluint64_t diff = *reinterpret_cast<QData*>(oldp) ^ newval;
if (VL_UNLIKELY(diff)) fullQData(oldp, newval, bits);
}
inline void CHG(WData)(vluint32_t* oldp, const WData* newvalp, int bits) {
for (int i = 0; i < (bits + 31) / 32; ++i) {
if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) {
fullWData(oldp, newvalp, bits);
return;
}
}
}
inline void CHG(Float)(vluint32_t* oldp, float newval) {
// cppcheck-suppress invalidPointerCast
if (VL_UNLIKELY(*reinterpret_cast<float*>(oldp) != newval)) fullFloat(oldp, newval);
}
inline void CHG(Double)(vluint32_t* oldp, double newval) {
// cppcheck-suppress invalidPointerCast
if (VL_UNLIKELY(*reinterpret_cast<double*>(oldp) != newval)) fullDouble(oldp, newval);
}
#undef CHG
};
#endif // guard