mirror of
https://github.com/verilator/verilator.git
synced 2025-01-08 15:47:36 +00:00
Merge from master
This commit is contained in:
commit
489f58011b
@ -159,7 +159,11 @@ void VL_DBG_MSGF(const char* formatp, ...) VL_MT_SAFE {
|
||||
std::string out = _vl_string_vprintf(formatp, ap);
|
||||
va_end(ap);
|
||||
// printf("-imm-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
|
||||
VL_PRINTF_MT("-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
|
||||
|
||||
// Using VL_PRINTF not VL_PRINTF_MT so that we can call VL_DBG_MSGF
|
||||
// from within the guts of the thread execution machinery (and it goes
|
||||
// to the screen and not into the queues we're debugging)
|
||||
VL_PRINTF("-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
|
||||
}
|
||||
|
||||
#ifdef VL_THREADED
|
||||
@ -1599,7 +1603,8 @@ std::string VL_CVT_PACK_STR_NW(int lwords, WDataInP lwp) VL_MT_SAFE {
|
||||
Verilated::ThreadLocal::ThreadLocal()
|
||||
:
|
||||
#ifdef VL_THREADED
|
||||
t_trainId(0),
|
||||
t_mtaskId(0),
|
||||
t_endOfEvalReqd(0),
|
||||
#endif
|
||||
t_dpiScopep(NULL), t_dpiFilename(0), t_dpiLineno(0) {
|
||||
}
|
||||
@ -1734,8 +1739,8 @@ const VerilatedScopeNameMap* Verilated::scopeNameMap() VL_MT_SAFE {
|
||||
}
|
||||
|
||||
#ifdef VL_THREADED
|
||||
void Verilated::endOfThreadTrainGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
VL_DEBUG_IF(VL_DBG_MSGF("End of thread train\n"););
|
||||
void Verilated::endOfThreadMTaskGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
VL_DEBUG_IF(VL_DBG_MSGF("End of thread mtask\n"););
|
||||
VerilatedThreadMsgQueue::flush(evalMsgQp);
|
||||
}
|
||||
|
||||
|
@ -111,6 +111,8 @@ extern vluint32_t VL_THREAD_ID() VL_MT_SAFE;
|
||||
|
||||
#if VL_THREADED
|
||||
|
||||
#define VL_LOCK_SPINS 50000 /// Number of times to spin for a mutex before relaxing
|
||||
|
||||
/// Mutex, wrapped to allow -fthread_safety checks
|
||||
class VL_CAPABILITY("mutex") VerilatedMutex {
|
||||
private:
|
||||
@ -119,9 +121,19 @@ class VL_CAPABILITY("mutex") VerilatedMutex {
|
||||
VerilatedMutex() {}
|
||||
~VerilatedMutex() {}
|
||||
/// Acquire/lock mutex
|
||||
void lock() VL_ACQUIRE() { m_mutex.lock(); }
|
||||
void lock() VL_ACQUIRE() {
|
||||
// Try to acquire the lock by spinning. If the wait is short,
|
||||
// avoids a trap to the OS plus OS scheduler overhead.
|
||||
if (VL_LIKELY(try_lock())) return; // Short circuit loop
|
||||
for (int i = 0; i < VL_LOCK_SPINS; ++i) {
|
||||
if (VL_LIKELY(try_lock())) return;
|
||||
VL_CPU_RELAX();
|
||||
}
|
||||
// Spinning hasn't worked, pay the cost of blocking.
|
||||
m_mutex.lock();
|
||||
}
|
||||
/// Release/unlock mutex
|
||||
void unlock() VL_RELEASE() { m_mutex.unlock(); }
|
||||
void unlock() VL_RELEASE() { m_mutex.unlock(); }
|
||||
/// Try to acquire mutex. Returns true on success, and false on failure.
|
||||
bool try_lock() VL_TRY_ACQUIRE(true) { return m_mutex.try_lock(); }
|
||||
};
|
||||
@ -143,14 +155,21 @@ class VL_SCOPED_CAPABILITY VerilatedLockGuard {
|
||||
|
||||
#else // !VL_THREADED
|
||||
|
||||
// Empty classes to avoid #ifdefs everywhere
|
||||
class VerilatedMutex {};
|
||||
/// Empty non-threaded mutex to avoid #ifdefs in consuming code
|
||||
class VerilatedMutex {
|
||||
public:
|
||||
void lock() {}
|
||||
void unlock() {}
|
||||
};
|
||||
|
||||
/// Empty non-threaded lock guard to avoid #ifdefs in consuming code
|
||||
class VerilatedLockGuard {
|
||||
VL_UNCOPYABLE(VerilatedLockGuard);
|
||||
public:
|
||||
explicit VerilatedLockGuard(VerilatedMutex&) {}
|
||||
~VerilatedLockGuard() {}
|
||||
};
|
||||
|
||||
#endif // VL_THREADED
|
||||
|
||||
/// Remember the calling thread at construction time, and make sure later calls use same thread
|
||||
@ -336,7 +355,7 @@ class Verilated {
|
||||
// Not covered by mutex, as per-thread
|
||||
static VL_THREAD_LOCAL struct ThreadLocal {
|
||||
#ifdef VL_THREADED
|
||||
vluint32_t t_trainId; ///< Current train# executing on this thread
|
||||
vluint32_t t_mtaskId; ///< Current mtask# executing on this thread
|
||||
vluint32_t t_endOfEvalReqd; ///< Messages may be pending, thread needs endOf-eval calls
|
||||
#endif
|
||||
const VerilatedScope* t_dpiScopep; ///< DPI context scope
|
||||
@ -455,22 +474,29 @@ public:
|
||||
static size_t serializedSize() VL_PURE { return sizeof(s_s); }
|
||||
static void* serializedPtr() VL_MT_UNSAFE { return &s_s; } // Unsafe, for Serialize only
|
||||
#ifdef VL_THREADED
|
||||
/// Set the trainId, called when a train starts
|
||||
static void trainId(vluint32_t id) VL_MT_SAFE { t_s.t_trainId = id; }
|
||||
static vluint32_t trainId() VL_MT_SAFE { return t_s.t_trainId; }
|
||||
/// Set the mtaskId, called when an mtask starts
|
||||
static void mtaskId(vluint32_t id) VL_MT_SAFE { t_s.t_mtaskId = id; }
|
||||
static vluint32_t mtaskId() VL_MT_SAFE { return t_s.t_mtaskId; }
|
||||
static void endOfEvalReqdInc() VL_MT_SAFE { ++t_s.t_endOfEvalReqd; }
|
||||
static void endOfEvalReqdDec() VL_MT_SAFE { --t_s.t_endOfEvalReqd; }
|
||||
/// Called at end of each thread train, before finishing eval
|
||||
static void endOfThreadTrain(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfThreadTrainGuts(evalMsgQp); } }
|
||||
|
||||
/// Called at end of each thread mtask, before finishing eval
|
||||
static void endOfThreadMTask(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfThreadMTaskGuts(evalMsgQp); }
|
||||
}
|
||||
/// Called at end of eval loop
|
||||
static void endOfEval(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfEvalGuts(evalMsgQp); } }
|
||||
// It doesn't work to set endOfEvalReqd on the threadpool thread
|
||||
// and then check it on the eval thread since it's thread local.
|
||||
// It should be ok to call into endOfEvalGuts, it returns immediately
|
||||
// if there are no transactions.
|
||||
endOfEvalGuts(evalMsgQp);
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
#ifdef VL_THREADED
|
||||
static void endOfThreadTrainGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
|
||||
static void endOfThreadMTaskGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
|
||||
static void endOfEvalGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
|
||||
#endif
|
||||
};
|
||||
@ -527,6 +553,11 @@ extern QData VL_RAND_RESET_Q(int obits); ///< Random reset a signal
|
||||
extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp); ///< Random reset a signal
|
||||
extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp); ///< Zero reset a signal (slow - else use VL_ZERO_W)
|
||||
|
||||
#if VL_THREADED
|
||||
/// Return high-precision counter for profiling, or 0x0 if not available
|
||||
inline QData VL_RDTSC_Q() { vluint64_t val; VL_RDTSC(val); return val; }
|
||||
#endif
|
||||
|
||||
/// Math
|
||||
extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP lwp, WDataInP rwp, bool is_modulus);
|
||||
|
||||
|
@ -49,25 +49,25 @@ class VerilatedScope;
|
||||
// Threaded message passing
|
||||
|
||||
#ifdef VL_THREADED
|
||||
/// Message, enqueued on a train, and consumed on the main eval thread
|
||||
/// Message, enqueued on an mtask, and consumed on the main eval thread
|
||||
class VerilatedMsg {
|
||||
public:
|
||||
// TYPES
|
||||
struct Cmp {
|
||||
bool operator() (const VerilatedMsg& a, const VerilatedMsg& b) const {
|
||||
return a.trainId() < b.trainId(); }
|
||||
return a.mtaskId() < b.mtaskId(); }
|
||||
};
|
||||
private:
|
||||
// MEMBERS
|
||||
vluint32_t m_trainId; ///< Train that did enqueue
|
||||
vluint32_t m_mtaskId; ///< MTask that did enqueue
|
||||
std::function<void()> m_cb; ///< Lambda to execute when message received
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
VerilatedMsg(const std::function<void()>& cb)
|
||||
: m_trainId(Verilated::trainId()), m_cb(cb) {}
|
||||
: m_mtaskId(Verilated::mtaskId()), m_cb(cb) {}
|
||||
~VerilatedMsg() {}
|
||||
// METHODS
|
||||
vluint32_t trainId() const { return m_trainId; }
|
||||
vluint32_t mtaskId() const { return m_mtaskId; }
|
||||
/// Execute the lambda function
|
||||
void run() const { m_cb(); }
|
||||
};
|
||||
@ -84,7 +84,9 @@ class VerilatedEvalMsgQueue {
|
||||
VerilatedThreadQueue m_queue VL_GUARDED_BY(m_mutex); ///< Message queue
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
VerilatedEvalMsgQueue() : m_depth(0) { }
|
||||
VerilatedEvalMsgQueue() : m_depth(0) {
|
||||
assert(atomic_is_lock_free(&m_depth));
|
||||
}
|
||||
~VerilatedEvalMsgQueue() { }
|
||||
private:
|
||||
VL_UNCOPYABLE(VerilatedEvalMsgQueue);
|
||||
@ -92,7 +94,6 @@ public:
|
||||
// METHODS
|
||||
//// Add message to queue (called by producer)
|
||||
void post(const VerilatedMsg& msg) VL_EXCLUDES(m_mutex) {
|
||||
Verilated::endOfEvalReqdInc(); // No mutex, threadsafe
|
||||
VerilatedLockGuard guard(m_mutex);
|
||||
m_queue.insert(msg); // Pass by value to copy the message into queue
|
||||
++m_depth;
|
||||
@ -114,10 +115,9 @@ public:
|
||||
m_queue.erase(it);
|
||||
m_mutex.unlock();
|
||||
m_depth--; // Ok if outside critical section as only this code checks the value
|
||||
Verilated::endOfEvalReqdDec(); // No mutex, threadsafe
|
||||
{
|
||||
VL_DEBUG_IF(VL_DBG_MSGF("Executing callback from trainId=%d\n", msg.trainId()););
|
||||
msg.run();
|
||||
VL_DEBUG_IF(VL_DBG_MSGF("Executing callback from mtaskId=%d\n", msg.mtaskId()););
|
||||
msg.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -143,8 +143,15 @@ private:
|
||||
public:
|
||||
/// Add message to queue, called by producer
|
||||
static void post(const VerilatedMsg& msg) VL_MT_SAFE {
|
||||
Verilated::endOfEvalReqdInc();
|
||||
threadton().m_queue.push(msg); // Pass by value to copy the message into queue
|
||||
// Handle calls to threaded routines outside
|
||||
// of any mtask -- if an initial block calls $finish, say.
|
||||
if (Verilated::mtaskId() == 0) {
|
||||
// No queueing, just do the action immediately
|
||||
msg.run();
|
||||
} else {
|
||||
Verilated::endOfEvalReqdInc();
|
||||
threadton().m_queue.push(msg); // Pass by value to copy the message into queue
|
||||
}
|
||||
}
|
||||
/// Push all messages to the eval's queue
|
||||
static void flush(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
|
||||
|
@ -353,6 +353,41 @@ typedef unsigned long long vluint64_t; ///< 64-bit unsigned type
|
||||
# define VL_ROUND(n) round(n)
|
||||
#endif
|
||||
|
||||
//=========================================================================
|
||||
// Performance counters
|
||||
|
||||
#if VL_THREADED
|
||||
# if defined(__i386__) || defined(__x86_64__)
|
||||
/// The vluint64_t argument is loaded with a high-performance counter for profiling
|
||||
/// or 0x0 if not implemeted on this platform
|
||||
# define VL_RDTSC(val) asm volatile("rdtsc" : "=A" (val))
|
||||
# elif defined(__aarch64__)
|
||||
# define VL_RDTSC(val) asm volatile("mrs %[rt],PMCCNTR_EL0" : [rt] "=r" (val));
|
||||
# elif
|
||||
// We just silently ignore unknown OSes, as only leads to missing statistics
|
||||
# define VL_RDTSC(val) (val) = 0;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
//=========================================================================
|
||||
// Threading related OS-specific functions
|
||||
|
||||
#if VL_THREADED
|
||||
# if defined(__i386__) || defined(__x86_64__)
|
||||
/// For more efficient busy waiting on SMT CPUs, let the processor know
|
||||
/// we're just waiting so it can let another thread run
|
||||
# define VL_CPU_RELAX() asm volatile("rep; nop" ::: "memory")
|
||||
# elif defined(__ia64__)
|
||||
# define VL_CPU_RELAX() asm volatile("hint @pause" ::: "memory")
|
||||
# elif defined(__aarch64__)
|
||||
# define VL_CPU_RELAX() asm volatile("yield" ::: "memory")
|
||||
# elif defined(__powerpc64__)
|
||||
# define VL_CPU_RELAX() asm volatile("or 1, 1, 1; or 2, 2, 2;" ::: "memory")
|
||||
# elif
|
||||
# error "Missing VL_CPU_RELAX() definition. Or, don't use VL_THREADED"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
//=========================================================================
|
||||
|
||||
#endif /*guard*/
|
||||
|
@ -1783,17 +1783,17 @@ void EmitCImp::emitWrapEval(AstNodeModule* modp) {
|
||||
}
|
||||
|
||||
if (v3Global.opt.threads()) { // THREADED-TODO move to per-train
|
||||
uint32_t trainId = 0;
|
||||
putsDecoration("// Train "+cvtToStr(trainId)+" start\n");
|
||||
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"Train starting, trainId="+cvtToStr(trainId)+"\\n\"););\n");
|
||||
puts("Verilated::trainId("+cvtToStr(trainId)+");\n");
|
||||
uint32_t mtaskId = 0;
|
||||
putsDecoration("// MTask "+cvtToStr(mtaskId)+" start\n");
|
||||
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"MTask starting, mtaskId="+cvtToStr(mtaskId)+"\\n\"););\n");
|
||||
puts("Verilated::mtaskId("+cvtToStr(mtaskId)+");\n");
|
||||
}
|
||||
emitSettleLoop(
|
||||
(string("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Clock loop\\n\"););\n")
|
||||
+ (v3Global.opt.trace() ? "vlSymsp->__Vm_activity = true;\n" : "")
|
||||
+ "_eval(vlSymsp);"), false);
|
||||
if (v3Global.opt.threads()) { // THREADED-TODO move to end of all trains on thread
|
||||
puts("Verilated::endOfThreadTrain(vlSymsp->__Vm_evalMsgQp);\n");
|
||||
if (v3Global.opt.threads()) { // THREADED-TODO move to end of all mtasks on thread
|
||||
puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
|
||||
}
|
||||
if (v3Global.opt.threads()) {
|
||||
puts("Verilated::endOfEval(vlSymsp->__Vm_evalMsgQp);\n");
|
||||
|
Loading…
Reference in New Issue
Block a user