Merge from master

This commit is contained in:
Wilson Snyder 2018-05-08 21:43:55 -04:00
commit 489f58011b
5 changed files with 113 additions and 35 deletions

View File

@ -159,7 +159,11 @@ void VL_DBG_MSGF(const char* formatp, ...) VL_MT_SAFE {
std::string out = _vl_string_vprintf(formatp, ap);
va_end(ap);
// printf("-imm-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
VL_PRINTF_MT("-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
// Using VL_PRINTF not VL_PRINTF_MT so that we can call VL_DBG_MSGF
// from within the guts of the thread execution machinery (and it goes
// to the screen and not into the queues we're debugging)
VL_PRINTF("-V{t%d,%" VL_PRI64 "d}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
}
#ifdef VL_THREADED
@ -1599,7 +1603,8 @@ std::string VL_CVT_PACK_STR_NW(int lwords, WDataInP lwp) VL_MT_SAFE {
Verilated::ThreadLocal::ThreadLocal()
:
#ifdef VL_THREADED
t_trainId(0),
t_mtaskId(0),
t_endOfEvalReqd(0),
#endif
t_dpiScopep(NULL), t_dpiFilename(0), t_dpiLineno(0) {
}
@ -1734,8 +1739,8 @@ const VerilatedScopeNameMap* Verilated::scopeNameMap() VL_MT_SAFE {
}
#ifdef VL_THREADED
void Verilated::endOfThreadTrainGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
VL_DEBUG_IF(VL_DBG_MSGF("End of thread train\n"););
void Verilated::endOfThreadMTaskGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
VL_DEBUG_IF(VL_DBG_MSGF("End of thread mtask\n"););
VerilatedThreadMsgQueue::flush(evalMsgQp);
}

View File

@ -111,6 +111,8 @@ extern vluint32_t VL_THREAD_ID() VL_MT_SAFE;
#if VL_THREADED
#define VL_LOCK_SPINS 50000 /// Number of times to spin for a mutex before relaxing
/// Mutex, wrapped to allow -fthread_safety checks
class VL_CAPABILITY("mutex") VerilatedMutex {
private:
@ -119,9 +121,19 @@ class VL_CAPABILITY("mutex") VerilatedMutex {
VerilatedMutex() {}
~VerilatedMutex() {}
/// Acquire/lock mutex
void lock() VL_ACQUIRE() { m_mutex.lock(); }
void lock() VL_ACQUIRE() {
// Try to acquire the lock by spinning. If the wait is short,
// avoids a trap to the OS plus OS scheduler overhead.
if (VL_LIKELY(try_lock())) return; // Short circuit loop
for (int i = 0; i < VL_LOCK_SPINS; ++i) {
if (VL_LIKELY(try_lock())) return;
VL_CPU_RELAX();
}
// Spinning hasn't worked, pay the cost of blocking.
m_mutex.lock();
}
/// Release/unlock mutex
void unlock() VL_RELEASE() { m_mutex.unlock(); }
void unlock() VL_RELEASE() { m_mutex.unlock(); }
/// Try to acquire mutex. Returns true on success, and false on failure.
bool try_lock() VL_TRY_ACQUIRE(true) { return m_mutex.try_lock(); }
};
@ -143,14 +155,21 @@ class VL_SCOPED_CAPABILITY VerilatedLockGuard {
#else // !VL_THREADED
// Empty classes to avoid #ifdefs everywhere
class VerilatedMutex {};
/// Empty non-threaded mutex to avoid #ifdefs in consuming code
class VerilatedMutex {
public:
void lock() {}
void unlock() {}
};
/// Empty non-threaded lock guard to avoid #ifdefs in consuming code
class VerilatedLockGuard {
VL_UNCOPYABLE(VerilatedLockGuard);
public:
explicit VerilatedLockGuard(VerilatedMutex&) {}
~VerilatedLockGuard() {}
};
#endif // VL_THREADED
/// Remember the calling thread at construction time, and make sure later calls use same thread
@ -336,7 +355,7 @@ class Verilated {
// Not covered by mutex, as per-thread
static VL_THREAD_LOCAL struct ThreadLocal {
#ifdef VL_THREADED
vluint32_t t_trainId; ///< Current train# executing on this thread
vluint32_t t_mtaskId; ///< Current mtask# executing on this thread
vluint32_t t_endOfEvalReqd; ///< Messages may be pending, thread needs endOf-eval calls
#endif
const VerilatedScope* t_dpiScopep; ///< DPI context scope
@ -455,22 +474,29 @@ public:
static size_t serializedSize() VL_PURE { return sizeof(s_s); }
static void* serializedPtr() VL_MT_UNSAFE { return &s_s; } // Unsafe, for Serialize only
#ifdef VL_THREADED
/// Set the trainId, called when a train starts
static void trainId(vluint32_t id) VL_MT_SAFE { t_s.t_trainId = id; }
static vluint32_t trainId() VL_MT_SAFE { return t_s.t_trainId; }
/// Set the mtaskId, called when an mtask starts
static void mtaskId(vluint32_t id) VL_MT_SAFE { t_s.t_mtaskId = id; }
static vluint32_t mtaskId() VL_MT_SAFE { return t_s.t_mtaskId; }
static void endOfEvalReqdInc() VL_MT_SAFE { ++t_s.t_endOfEvalReqd; }
static void endOfEvalReqdDec() VL_MT_SAFE { --t_s.t_endOfEvalReqd; }
/// Called at end of each thread train, before finishing eval
static void endOfThreadTrain(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfThreadTrainGuts(evalMsgQp); } }
/// Called at end of each thread mtask, before finishing eval
static void endOfThreadMTask(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfThreadMTaskGuts(evalMsgQp); }
}
/// Called at end of eval loop
static void endOfEval(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {
if (VL_UNLIKELY(t_s.t_endOfEvalReqd)) { endOfEvalGuts(evalMsgQp); } }
// It doesn't work to set endOfEvalReqd on the threadpool thread
// and then check it on the eval thread since it's thread local.
// It should be ok to call into endOfEvalGuts, it returns immediately
// if there are no transactions.
endOfEvalGuts(evalMsgQp);
}
#endif
private:
#ifdef VL_THREADED
static void endOfThreadTrainGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
static void endOfThreadMTaskGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
static void endOfEvalGuts(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE;
#endif
};
@ -527,6 +553,11 @@ extern QData VL_RAND_RESET_Q(int obits); ///< Random reset a signal
extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp); ///< Random reset a signal
extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp); ///< Zero reset a signal (slow - else use VL_ZERO_W)
#if VL_THREADED
/// Return high-precision counter for profiling, or 0x0 if not available
inline QData VL_RDTSC_Q() { vluint64_t val; VL_RDTSC(val); return val; }
#endif
/// Math
extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP lwp, WDataInP rwp, bool is_modulus);

View File

@ -49,25 +49,25 @@ class VerilatedScope;
// Threaded message passing
#ifdef VL_THREADED
/// Message, enqueued on a train, and consumed on the main eval thread
/// Message, enqueued on an mtask, and consumed on the main eval thread
class VerilatedMsg {
public:
// TYPES
struct Cmp {
bool operator() (const VerilatedMsg& a, const VerilatedMsg& b) const {
return a.trainId() < b.trainId(); }
return a.mtaskId() < b.mtaskId(); }
};
private:
// MEMBERS
vluint32_t m_trainId; ///< Train that did enqueue
vluint32_t m_mtaskId; ///< MTask that did enqueue
std::function<void()> m_cb; ///< Lambda to execute when message received
public:
// CONSTRUCTORS
VerilatedMsg(const std::function<void()>& cb)
: m_trainId(Verilated::trainId()), m_cb(cb) {}
: m_mtaskId(Verilated::mtaskId()), m_cb(cb) {}
~VerilatedMsg() {}
// METHODS
vluint32_t trainId() const { return m_trainId; }
vluint32_t mtaskId() const { return m_mtaskId; }
/// Execute the lambda function
void run() const { m_cb(); }
};
@ -84,7 +84,9 @@ class VerilatedEvalMsgQueue {
VerilatedThreadQueue m_queue VL_GUARDED_BY(m_mutex); ///< Message queue
public:
// CONSTRUCTORS
VerilatedEvalMsgQueue() : m_depth(0) { }
VerilatedEvalMsgQueue() : m_depth(0) {
assert(atomic_is_lock_free(&m_depth));
}
~VerilatedEvalMsgQueue() { }
private:
VL_UNCOPYABLE(VerilatedEvalMsgQueue);
@ -92,7 +94,6 @@ public:
// METHODS
//// Add message to queue (called by producer)
void post(const VerilatedMsg& msg) VL_EXCLUDES(m_mutex) {
Verilated::endOfEvalReqdInc(); // No mutex, threadsafe
VerilatedLockGuard guard(m_mutex);
m_queue.insert(msg); // Pass by value to copy the message into queue
++m_depth;
@ -114,10 +115,9 @@ public:
m_queue.erase(it);
m_mutex.unlock();
m_depth--; // Ok if outside critical section as only this code checks the value
Verilated::endOfEvalReqdDec(); // No mutex, threadsafe
{
VL_DEBUG_IF(VL_DBG_MSGF("Executing callback from trainId=%d\n", msg.trainId()););
msg.run();
VL_DEBUG_IF(VL_DBG_MSGF("Executing callback from mtaskId=%d\n", msg.mtaskId()););
msg.run();
}
}
}
@ -143,8 +143,15 @@ private:
public:
/// Add message to queue, called by producer
static void post(const VerilatedMsg& msg) VL_MT_SAFE {
Verilated::endOfEvalReqdInc();
threadton().m_queue.push(msg); // Pass by value to copy the message into queue
// Handle calls to threaded routines outside
// of any mtask -- if an initial block calls $finish, say.
if (Verilated::mtaskId() == 0) {
// No queueing, just do the action immediately
msg.run();
} else {
Verilated::endOfEvalReqdInc();
threadton().m_queue.push(msg); // Pass by value to copy the message into queue
}
}
/// Push all messages to the eval's queue
static void flush(VerilatedEvalMsgQueue* evalMsgQp) VL_MT_SAFE {

View File

@ -353,6 +353,41 @@ typedef unsigned long long vluint64_t; ///< 64-bit unsigned type
# define VL_ROUND(n) round(n)
#endif
//=========================================================================
// Performance counters
#if VL_THREADED
# if defined(__i386__) || defined(__x86_64__)
/// The vluint64_t argument is loaded with a high-performance counter for profiling
/// or 0x0 if not implemeted on this platform
# define VL_RDTSC(val) asm volatile("rdtsc" : "=A" (val))
# elif defined(__aarch64__)
# define VL_RDTSC(val) asm volatile("mrs %[rt],PMCCNTR_EL0" : [rt] "=r" (val));
# elif
// We just silently ignore unknown OSes, as only leads to missing statistics
# define VL_RDTSC(val) (val) = 0;
# endif
#endif
//=========================================================================
// Threading related OS-specific functions
#if VL_THREADED
# if defined(__i386__) || defined(__x86_64__)
/// For more efficient busy waiting on SMT CPUs, let the processor know
/// we're just waiting so it can let another thread run
# define VL_CPU_RELAX() asm volatile("rep; nop" ::: "memory")
# elif defined(__ia64__)
# define VL_CPU_RELAX() asm volatile("hint @pause" ::: "memory")
# elif defined(__aarch64__)
# define VL_CPU_RELAX() asm volatile("yield" ::: "memory")
# elif defined(__powerpc64__)
# define VL_CPU_RELAX() asm volatile("or 1, 1, 1; or 2, 2, 2;" ::: "memory")
# elif
# error "Missing VL_CPU_RELAX() definition. Or, don't use VL_THREADED"
# endif
#endif
//=========================================================================
#endif /*guard*/

View File

@ -1783,17 +1783,17 @@ void EmitCImp::emitWrapEval(AstNodeModule* modp) {
}
if (v3Global.opt.threads()) { // THREADED-TODO move to per-train
uint32_t trainId = 0;
putsDecoration("// Train "+cvtToStr(trainId)+" start\n");
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"Train starting, trainId="+cvtToStr(trainId)+"\\n\"););\n");
puts("Verilated::trainId("+cvtToStr(trainId)+");\n");
uint32_t mtaskId = 0;
putsDecoration("// MTask "+cvtToStr(mtaskId)+" start\n");
puts("VL_DEBUG_IF(VL_DBG_MSGF(\"MTask starting, mtaskId="+cvtToStr(mtaskId)+"\\n\"););\n");
puts("Verilated::mtaskId("+cvtToStr(mtaskId)+");\n");
}
emitSettleLoop(
(string("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Clock loop\\n\"););\n")
+ (v3Global.opt.trace() ? "vlSymsp->__Vm_activity = true;\n" : "")
+ "_eval(vlSymsp);"), false);
if (v3Global.opt.threads()) { // THREADED-TODO move to end of all trains on thread
puts("Verilated::endOfThreadTrain(vlSymsp->__Vm_evalMsgQp);\n");
if (v3Global.opt.threads()) { // THREADED-TODO move to end of all mtasks on thread
puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
}
if (v3Global.opt.threads()) {
puts("Verilated::endOfEval(vlSymsp->__Vm_evalMsgQp);\n");