forked from github/verilator
Various minor optimizations of VCD trace routines
- Change templated trace routines to branch table. Removed templating from trace chgBus and fullBus and replaced them with a branch table like the other there is a very small (< 1%) penalty for this on SwerRV EH1 CoreMark, but this is less than the variability of disk IO so it's worth it to keep the code simpler and smaller. - Prefetch VCD suffix buffer at the top of emit* - Increase ILP in VCD emit* routines - Use a 64-bit unaligned store to emit the VCD suffix (on x86 only) The performance difference with these is very small, but the changes hopefully make this code more performance-portable across various micro-architectures.
This commit is contained in:
parent
70549e1a64
commit
b79ef672e1
@ -208,21 +208,31 @@ void VerilatedFst::declDouble(vluint32_t code, const char* name, int dtypenum, f
|
||||
declSymbol(code, name, dtypenum, vardir, vartype, array, arraynum, 2, 64);
|
||||
}
|
||||
|
||||
// Note: emit* are only ever called from one place (full* in
|
||||
// verilated_trace_imp.cpp, which is included in this file at the top),
|
||||
// so always inline them.
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitBit(vluint32_t code, vluint32_t newval) {
|
||||
fstWriterEmitValueChange(m_fst, m_symbolp[code], newval ? "1" : "0");
|
||||
}
|
||||
template <int T_Bits> void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval) {
|
||||
fstWriterEmitValueChange32(m_fst, m_symbolp[code], T_Bits, newval);
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval, int bits) {
|
||||
fstWriterEmitValueChange32(m_fst, m_symbolp[code], bits, newval);
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitQuad(vluint32_t code, vluint64_t newval, int bits) {
|
||||
fstWriterEmitValueChange64(m_fst, m_symbolp[code], bits, newval);
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) {
|
||||
fstWriterEmitValueChangeVec32(m_fst, m_symbolp[code], bits, newvalp);
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitFloat(vluint32_t code, float newval) {
|
||||
fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedFst::emitDouble(vluint32_t code, double newval) {
|
||||
fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
|
||||
}
|
||||
|
@ -67,13 +67,14 @@ protected:
|
||||
bool preFullDump() VL_OVERRIDE { return isOpen(); }
|
||||
bool preChangeDump() VL_OVERRIDE { return isOpen(); }
|
||||
|
||||
// Implementations of duck-typed methods for VerilatedTrace
|
||||
void emitBit(vluint32_t code, vluint32_t newval);
|
||||
template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval);
|
||||
void emitQuad(vluint32_t code, vluint64_t newval, int bits);
|
||||
void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
|
||||
void emitFloat(vluint32_t code, float newval);
|
||||
void emitDouble(vluint32_t code, double newval);
|
||||
// Implementations of duck-typed methods for VerilatedTrace. These are
|
||||
// called from only one place (namely full*) so always inline them.
|
||||
inline void emitBit(vluint32_t code, vluint32_t newval);
|
||||
inline void emitBus(vluint32_t code, vluint32_t newval, int bits);
|
||||
inline void emitQuad(vluint32_t code, vluint64_t newval, int bits);
|
||||
inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
|
||||
inline void emitFloat(vluint32_t code, float newval);
|
||||
inline void emitDouble(vluint32_t code, double newval);
|
||||
|
||||
public:
|
||||
//=========================================================================
|
||||
|
@ -262,7 +262,7 @@ public:
|
||||
// this is very hot code during tracing.
|
||||
|
||||
// duck-typed void emitBit(vluint32_t code, vluint32_t newval) = 0;
|
||||
// duck-typed template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval) = 0;
|
||||
// duck-typed void emitBus(vluint32_t code, vluint32_t newval, int bits) = 0;
|
||||
// duck-typed void emitQuad(vluint32_t code, vluint64_t newval, int bits) = 0;
|
||||
// duck-typed void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) = 0;
|
||||
// duck-typed void emitFloat(vluint32_t code, float newval) = 0;
|
||||
@ -272,7 +272,7 @@ public:
|
||||
|
||||
// Write to previous value buffer value and emit trace entry.
|
||||
void fullBit(vluint32_t* oldp, vluint32_t newval);
|
||||
template <int T_Bits> void fullBus(vluint32_t* oldp, vluint32_t newval);
|
||||
void fullBus(vluint32_t* oldp, vluint32_t newval, int bits);
|
||||
void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits);
|
||||
void fullArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits);
|
||||
void fullFloat(vluint32_t* oldp, float newval);
|
||||
@ -286,8 +286,8 @@ public:
|
||||
m_traceBufferWritep += 2;
|
||||
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
|
||||
}
|
||||
template <int T_Bits> inline void chgBus(vluint32_t* oldp, vluint32_t newval) {
|
||||
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | T_Bits;
|
||||
inline void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) {
|
||||
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | bits;
|
||||
m_traceBufferWritep[1].oldp = oldp;
|
||||
m_traceBufferWritep[2].newBits = newval;
|
||||
m_traceBufferWritep += 3;
|
||||
@ -339,9 +339,9 @@ public:
|
||||
const vluint32_t diff = *oldp ^ newval;
|
||||
if (VL_UNLIKELY(diff)) fullBit(oldp, newval);
|
||||
}
|
||||
template <int T_Bits> inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval) {
|
||||
inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval, int bits) {
|
||||
const vluint32_t diff = *oldp ^ newval;
|
||||
if (VL_UNLIKELY(diff)) fullBus<T_Bits>(oldp, newval);
|
||||
if (VL_UNLIKELY(diff)) fullBus(oldp, newval, bits);
|
||||
}
|
||||
inline void CHG(Quad)(vluint32_t* oldp, vluint64_t newval, int bits) {
|
||||
const vluint64_t diff = *reinterpret_cast<vluint64_t*>(oldp) ^ newval;
|
||||
|
@ -161,50 +161,15 @@ template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() {
|
||||
continue;
|
||||
case VerilatedTraceCommand::CHG_BUS:
|
||||
VL_TRACE_THREAD_DEBUG("Command CHG_BUS");
|
||||
|
||||
oldp = (readp++)->oldp;
|
||||
newBits = (readp++)->newBits;
|
||||
|
||||
// Bits stored in bottom byte of command
|
||||
switch (cmd & 0xFFU) {
|
||||
case 2: chgBusImpl<2>(oldp, newBits); continue;
|
||||
case 3: chgBusImpl<3>(oldp, newBits); continue;
|
||||
case 4: chgBusImpl<4>(oldp, newBits); continue;
|
||||
case 5: chgBusImpl<5>(oldp, newBits); continue;
|
||||
case 6: chgBusImpl<6>(oldp, newBits); continue;
|
||||
case 7: chgBusImpl<7>(oldp, newBits); continue;
|
||||
case 8: chgBusImpl<8>(oldp, newBits); continue;
|
||||
case 9: chgBusImpl<9>(oldp, newBits); continue;
|
||||
case 10: chgBusImpl<10>(oldp, newBits); continue;
|
||||
case 11: chgBusImpl<11>(oldp, newBits); continue;
|
||||
case 12: chgBusImpl<12>(oldp, newBits); continue;
|
||||
case 13: chgBusImpl<13>(oldp, newBits); continue;
|
||||
case 14: chgBusImpl<14>(oldp, newBits); continue;
|
||||
case 15: chgBusImpl<15>(oldp, newBits); continue;
|
||||
case 16: chgBusImpl<16>(oldp, newBits); continue;
|
||||
case 17: chgBusImpl<17>(oldp, newBits); continue;
|
||||
case 18: chgBusImpl<18>(oldp, newBits); continue;
|
||||
case 19: chgBusImpl<19>(oldp, newBits); continue;
|
||||
case 20: chgBusImpl<20>(oldp, newBits); continue;
|
||||
case 21: chgBusImpl<21>(oldp, newBits); continue;
|
||||
case 22: chgBusImpl<22>(oldp, newBits); continue;
|
||||
case 23: chgBusImpl<23>(oldp, newBits); continue;
|
||||
case 24: chgBusImpl<24>(oldp, newBits); continue;
|
||||
case 25: chgBusImpl<25>(oldp, newBits); continue;
|
||||
case 26: chgBusImpl<26>(oldp, newBits); continue;
|
||||
case 27: chgBusImpl<27>(oldp, newBits); continue;
|
||||
case 28: chgBusImpl<28>(oldp, newBits); continue;
|
||||
case 29: chgBusImpl<29>(oldp, newBits); continue;
|
||||
case 30: chgBusImpl<30>(oldp, newBits); continue;
|
||||
case 31: chgBusImpl<31>(oldp, newBits); continue;
|
||||
case 32: chgBusImpl<32>(oldp, newBits); continue;
|
||||
}
|
||||
VL_FATAL_MT(__FILE__, __LINE__, "", "Bad number of bits in CHG_BUS command");
|
||||
break;
|
||||
chgBusImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL);
|
||||
readp += 2;
|
||||
VL_TRACE_THREAD_DEBUG("Command CHG_BUS DONE");
|
||||
continue;
|
||||
case VerilatedTraceCommand::CHG_QUAD:
|
||||
VL_TRACE_THREAD_DEBUG("Command CHG_QUAD");
|
||||
// Bits stored in bottom byte of command
|
||||
chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFF);
|
||||
chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL);
|
||||
readp += 2;
|
||||
continue;
|
||||
case VerilatedTraceCommand::CHG_ARRAY:
|
||||
@ -516,49 +481,12 @@ template <> void VerilatedTrace<VL_DERIVED_T>::fullBit(vluint32_t* oldp, vluint3
|
||||
self()->emitBit(oldp - m_sigs_oldvalp, newval);
|
||||
}
|
||||
|
||||
// We want these functions specialized for sizes to avoid hard to predict
|
||||
// branches, but we don't want them inlined, so we explicitly instantiate the
|
||||
// template for each size used by Verilator.
|
||||
template <>
|
||||
template <int T_Bits>
|
||||
void VerilatedTrace<VL_DERIVED_T>::fullBus(vluint32_t* oldp, vluint32_t newval) {
|
||||
void VerilatedTrace<VL_DERIVED_T>::fullBus(vluint32_t* oldp, vluint32_t newval, int bits) {
|
||||
*oldp = newval;
|
||||
self()->emitBus<T_Bits>(oldp - m_sigs_oldvalp, newval);
|
||||
self()->emitBus(oldp - m_sigs_oldvalp, newval, bits);
|
||||
}
|
||||
|
||||
// Note: No specialization for width 1, covered by 'fullBit'
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<2>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<3>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<4>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<5>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<6>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<7>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<8>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<9>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<10>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<11>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<12>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<13>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<14>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<15>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<16>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<17>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<18>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<19>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<20>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<21>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<22>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<23>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<24>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<25>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<26>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<27>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<28>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<29>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<30>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<31>(vluint32_t* oldp, vluint32_t newval);
|
||||
template void VerilatedTrace<VL_DERIVED_T>::fullBus<32>(vluint32_t* oldp, vluint32_t newval);
|
||||
|
||||
template <>
|
||||
void VerilatedTrace<VL_DERIVED_T>::fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
|
||||
*reinterpret_cast<vluint64_t*>(oldp) = newval;
|
||||
|
@ -611,15 +611,20 @@ void VerilatedVcd::declTriArray(vluint32_t code, const char* name, bool array, i
|
||||
//=============================================================================
|
||||
// Emit trace entries
|
||||
|
||||
#define VL_VCD_SUFFIXP(code) (m_suffixesp + (code)*VL_TRACE_SUFFIX_ENTRY_SIZE)
|
||||
|
||||
// Emit suffix, write back write pointer, check buffer
|
||||
void VerilatedVcd::finishLine(vluint32_t code, char* writep) {
|
||||
const char* const suffixp = m_suffixesp + code * VL_TRACE_SUFFIX_ENTRY_SIZE;
|
||||
const char* const suffixp = VL_VCD_SUFFIXP(code);
|
||||
// Copy the whole suffix (this avoid having hard to predict branches which
|
||||
// helps a lot). Note suffixp could be aligned, so could load it in one go,
|
||||
// but then we would be endiannes dependent which we don't have a way to
|
||||
// test right now and probably would make little difference...
|
||||
// Note: The maximum length of the suffix is
|
||||
// helps a lot). Note: The maximum length of the suffix is
|
||||
// VL_TRACE_MAX_VCD_CODE_SIZE + 2 == 7, but we unroll this here for speed.
|
||||
#ifdef __x86_64__
|
||||
// Copy the whole 8 bytes in one go, this works on little-endian machines
|
||||
// supporting unaligned stores.
|
||||
*reinterpret_cast<vluint64_t*>(writep) = *reinterpret_cast<const vluint64_t*>(suffixp);
|
||||
#else
|
||||
// Portable variant
|
||||
writep[0] = suffixp[0];
|
||||
writep[1] = suffixp[1];
|
||||
writep[2] = suffixp[2];
|
||||
@ -627,139 +632,202 @@ void VerilatedVcd::finishLine(vluint32_t code, char* writep) {
|
||||
writep[4] = suffixp[4];
|
||||
writep[5] = suffixp[5];
|
||||
writep[6] = '\n'; // The 6th index is always '\n' if it's relevant, no need to fetch it.
|
||||
#endif
|
||||
// Now write back the write pointer incremented by the actual size of the
|
||||
// suffix, which was stored in the last byte of the suffix buffer entry.
|
||||
m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1];
|
||||
bufferCheck();
|
||||
}
|
||||
|
||||
// Note: emit* are only ever called from one place (full* in
|
||||
// verilated_trace_imp.cpp, which is included in this file at the top),
|
||||
// so always inline them.
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitBit(vluint32_t code, vluint32_t newval) {
|
||||
// Don't prefetch suffix as it's a bit too late;
|
||||
char* wp = m_writep;
|
||||
*wp++ = '0' | static_cast<char>(newval);
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
template <int T_Bits> void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval) {
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval, int bits) {
|
||||
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
|
||||
char* wp = m_writep;
|
||||
*wp++ = 'b';
|
||||
newval <<= 32 - T_Bits;
|
||||
int bits = T_Bits;
|
||||
do {
|
||||
*wp++ = '0' | static_cast<char>(newval >> 31);
|
||||
newval <<= 1;
|
||||
} while (--bits);
|
||||
wp += bits;
|
||||
// clang-format off
|
||||
switch (bits) {
|
||||
case 32: wp[-32] = '0' | static_cast<char>((newval >> 31) ); //FALLTHRU
|
||||
case 31: wp[-31] = '0' | static_cast<char>((newval >> 30) & 1); //FALLTHRU
|
||||
case 30: wp[-30] = '0' | static_cast<char>((newval >> 29) & 1); //FALLTHRU
|
||||
case 29: wp[-29] = '0' | static_cast<char>((newval >> 28) & 1); //FALLTHRU
|
||||
case 28: wp[-28] = '0' | static_cast<char>((newval >> 27) & 1); //FALLTHRU
|
||||
case 27: wp[-27] = '0' | static_cast<char>((newval >> 26) & 1); //FALLTHRU
|
||||
case 26: wp[-26] = '0' | static_cast<char>((newval >> 25) & 1); //FALLTHRU
|
||||
case 25: wp[-25] = '0' | static_cast<char>((newval >> 24) & 1); //FALLTHRU
|
||||
case 24: wp[-24] = '0' | static_cast<char>((newval >> 23) & 1); //FALLTHRU
|
||||
case 23: wp[-23] = '0' | static_cast<char>((newval >> 22) & 1); //FALLTHRU
|
||||
case 22: wp[-22] = '0' | static_cast<char>((newval >> 21) & 1); //FALLTHRU
|
||||
case 21: wp[-21] = '0' | static_cast<char>((newval >> 20) & 1); //FALLTHRU
|
||||
case 20: wp[-20] = '0' | static_cast<char>((newval >> 19) & 1); //FALLTHRU
|
||||
case 19: wp[-19] = '0' | static_cast<char>((newval >> 18) & 1); //FALLTHRU
|
||||
case 18: wp[-18] = '0' | static_cast<char>((newval >> 17) & 1); //FALLTHRU
|
||||
case 17: wp[-17] = '0' | static_cast<char>((newval >> 16) & 1); //FALLTHRU
|
||||
case 16: wp[-16] = '0' | static_cast<char>((newval >> 15) & 1); //FALLTHRU
|
||||
case 15: wp[-15] = '0' | static_cast<char>((newval >> 14) & 1); //FALLTHRU
|
||||
case 14: wp[-14] = '0' | static_cast<char>((newval >> 13) & 1); //FALLTHRU
|
||||
case 13: wp[-13] = '0' | static_cast<char>((newval >> 12) & 1); //FALLTHRU
|
||||
case 12: wp[-12] = '0' | static_cast<char>((newval >> 11) & 1); //FALLTHRU
|
||||
case 11: wp[-11] = '0' | static_cast<char>((newval >> 10) & 1); //FALLTHRU
|
||||
case 10: wp[-10] = '0' | static_cast<char>((newval >> 9) & 1); //FALLTHRU
|
||||
case 9: wp[ -9] = '0' | static_cast<char>((newval >> 8) & 1); //FALLTHRU
|
||||
case 8: wp[ -8] = '0' | static_cast<char>((newval >> 7) & 1); //FALLTHRU
|
||||
case 7: wp[ -7] = '0' | static_cast<char>((newval >> 6) & 1); //FALLTHRU
|
||||
case 6: wp[ -6] = '0' | static_cast<char>((newval >> 5) & 1); //FALLTHRU
|
||||
case 5: wp[ -5] = '0' | static_cast<char>((newval >> 4) & 1); //FALLTHRU
|
||||
case 4: wp[ -4] = '0' | static_cast<char>((newval >> 3) & 1); //FALLTHRU
|
||||
case 3: wp[ -3] = '0' | static_cast<char>((newval >> 2) & 1); //FALLTHRU
|
||||
case 2: wp[ -2] = '0' | static_cast<char>((newval >> 1) & 1); //FALLTHRU
|
||||
/*bit*/ wp[ -1] = '0' | static_cast<char>((newval ) & 1); //FALLTHRU
|
||||
}
|
||||
// clang-format on
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitQuad(vluint32_t code, vluint64_t newval, int bits) {
|
||||
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
|
||||
char* wp = m_writep;
|
||||
*wp++ = 'b';
|
||||
newval <<= 64 - bits;
|
||||
// Handle the top 32 bits within the 64 bit input
|
||||
const int bitsInTopHalf = bits - 32;
|
||||
wp += bitsInTopHalf;
|
||||
// clang-format off
|
||||
switch (bitsInTopHalf) {
|
||||
case 32: wp[-32] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 31: wp[-31] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 30: wp[-30] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 29: wp[-29] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 28: wp[-28] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 27: wp[-27] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 26: wp[-26] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 25: wp[-25] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 24: wp[-24] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 23: wp[-23] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 22: wp[-22] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 21: wp[-21] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 20: wp[-20] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 19: wp[-19] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 18: wp[-18] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 17: wp[-17] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 16: wp[-16] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 15: wp[-15] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 14: wp[-14] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 13: wp[-13] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 12: wp[-12] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 11: wp[-11] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 10: wp[-10] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 9: wp[ -9] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 8: wp[ -8] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 7: wp[ -7] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 6: wp[ -6] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 5: wp[ -5] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 4: wp[ -4] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 3: wp[ -3] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 2: wp[ -2] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 1: wp[ -1] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
|
||||
case 32: wp[-32] = '0' | static_cast<char>((newval >> 63) ); //FALLTHRU
|
||||
case 31: wp[-31] = '0' | static_cast<char>((newval >> 62) & 1); //FALLTHRU
|
||||
case 30: wp[-30] = '0' | static_cast<char>((newval >> 61) & 1); //FALLTHRU
|
||||
case 29: wp[-29] = '0' | static_cast<char>((newval >> 60) & 1); //FALLTHRU
|
||||
case 28: wp[-28] = '0' | static_cast<char>((newval >> 59) & 1); //FALLTHRU
|
||||
case 27: wp[-27] = '0' | static_cast<char>((newval >> 58) & 1); //FALLTHRU
|
||||
case 26: wp[-26] = '0' | static_cast<char>((newval >> 57) & 1); //FALLTHRU
|
||||
case 25: wp[-25] = '0' | static_cast<char>((newval >> 56) & 1); //FALLTHRU
|
||||
case 24: wp[-24] = '0' | static_cast<char>((newval >> 55) & 1); //FALLTHRU
|
||||
case 23: wp[-23] = '0' | static_cast<char>((newval >> 54) & 1); //FALLTHRU
|
||||
case 22: wp[-22] = '0' | static_cast<char>((newval >> 53) & 1); //FALLTHRU
|
||||
case 21: wp[-21] = '0' | static_cast<char>((newval >> 52) & 1); //FALLTHRU
|
||||
case 20: wp[-20] = '0' | static_cast<char>((newval >> 51) & 1); //FALLTHRU
|
||||
case 19: wp[-19] = '0' | static_cast<char>((newval >> 50) & 1); //FALLTHRU
|
||||
case 18: wp[-18] = '0' | static_cast<char>((newval >> 49) & 1); //FALLTHRU
|
||||
case 17: wp[-17] = '0' | static_cast<char>((newval >> 48) & 1); //FALLTHRU
|
||||
case 16: wp[-16] = '0' | static_cast<char>((newval >> 47) & 1); //FALLTHRU
|
||||
case 15: wp[-15] = '0' | static_cast<char>((newval >> 46) & 1); //FALLTHRU
|
||||
case 14: wp[-14] = '0' | static_cast<char>((newval >> 45) & 1); //FALLTHRU
|
||||
case 13: wp[-13] = '0' | static_cast<char>((newval >> 44) & 1); //FALLTHRU
|
||||
case 12: wp[-12] = '0' | static_cast<char>((newval >> 43) & 1); //FALLTHRU
|
||||
case 11: wp[-11] = '0' | static_cast<char>((newval >> 42) & 1); //FALLTHRU
|
||||
case 10: wp[-10] = '0' | static_cast<char>((newval >> 41) & 1); //FALLTHRU
|
||||
case 9: wp[ -9] = '0' | static_cast<char>((newval >> 40) & 1); //FALLTHRU
|
||||
case 8: wp[ -8] = '0' | static_cast<char>((newval >> 39) & 1); //FALLTHRU
|
||||
case 7: wp[ -7] = '0' | static_cast<char>((newval >> 38) & 1); //FALLTHRU
|
||||
case 6: wp[ -6] = '0' | static_cast<char>((newval >> 37) & 1); //FALLTHRU
|
||||
case 5: wp[ -5] = '0' | static_cast<char>((newval >> 36) & 1); //FALLTHRU
|
||||
case 4: wp[ -4] = '0' | static_cast<char>((newval >> 35) & 1); //FALLTHRU
|
||||
case 3: wp[ -3] = '0' | static_cast<char>((newval >> 34) & 1); //FALLTHRU
|
||||
case 2: wp[ -2] = '0' | static_cast<char>((newval >> 33) & 1); //FALLTHRU
|
||||
case 1: wp[ -1] = '0' | static_cast<char>((newval >> 32) & 1); //FALLTHRU
|
||||
}
|
||||
// clang-format on
|
||||
// Handle the bottom 32 bits within the 64 bit input
|
||||
int remaining = 32;
|
||||
vluint32_t val = static_cast<vluint32_t>(newval); // Truncate to bottom 32 bits
|
||||
int loops = 4;
|
||||
do {
|
||||
*wp++ = '0' | static_cast<char>(newval >> 63);
|
||||
newval <<= 1;
|
||||
} while (--remaining);
|
||||
wp[0] = '0' | static_cast<char>((val >> 31));
|
||||
wp[1] = '0' | static_cast<char>((val >> 30) & 1);
|
||||
wp[2] = '0' | static_cast<char>((val >> 29) & 1);
|
||||
wp[3] = '0' | static_cast<char>((val >> 28) & 1);
|
||||
wp[4] = '0' | static_cast<char>((val >> 27) & 1);
|
||||
wp[5] = '0' | static_cast<char>((val >> 26) & 1);
|
||||
wp[6] = '0' | static_cast<char>((val >> 25) & 1);
|
||||
wp[7] = '0' | static_cast<char>((val >> 24) & 1);
|
||||
wp += 8;
|
||||
val <<= 8;
|
||||
} while (--loops);
|
||||
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) {
|
||||
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
|
||||
int words = (bits + 31) / 32;
|
||||
char* wp = m_writep;
|
||||
*wp++ = 'b';
|
||||
// Handle the most significant word
|
||||
vluint32_t val = newvalp[--words];
|
||||
const int bitsInMSW = bits % 32 == 0 ? 32 : bits % 32;
|
||||
vluint32_t val = newvalp[--words] << (32 - bitsInMSW);
|
||||
wp += bitsInMSW;
|
||||
// clang-format off
|
||||
switch (bitsInMSW) {
|
||||
case 32: wp[-32] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 31: wp[-31] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 30: wp[-30] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 29: wp[-29] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 28: wp[-28] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 27: wp[-27] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 26: wp[-26] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 25: wp[-25] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 24: wp[-24] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 23: wp[-23] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 22: wp[-22] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 21: wp[-21] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 20: wp[-20] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 19: wp[-19] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 18: wp[-18] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 17: wp[-17] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 16: wp[-16] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 15: wp[-15] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 14: wp[-14] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 13: wp[-13] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 12: wp[-12] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 11: wp[-11] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 10: wp[-10] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 9: wp[ -9] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 8: wp[ -8] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 7: wp[ -7] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 6: wp[ -6] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 5: wp[ -5] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 4: wp[ -4] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 3: wp[ -3] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 2: wp[ -2] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 1: wp[ -1] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
|
||||
case 32: wp[-32] = '0' | static_cast<char>((val >> 31) ); //FALLTHRU
|
||||
case 31: wp[-31] = '0' | static_cast<char>((val >> 30) & 1); //FALLTHRU
|
||||
case 30: wp[-30] = '0' | static_cast<char>((val >> 29) & 1); //FALLTHRU
|
||||
case 29: wp[-29] = '0' | static_cast<char>((val >> 28) & 1); //FALLTHRU
|
||||
case 28: wp[-28] = '0' | static_cast<char>((val >> 27) & 1); //FALLTHRU
|
||||
case 27: wp[-27] = '0' | static_cast<char>((val >> 26) & 1); //FALLTHRU
|
||||
case 26: wp[-26] = '0' | static_cast<char>((val >> 25) & 1); //FALLTHRU
|
||||
case 25: wp[-25] = '0' | static_cast<char>((val >> 24) & 1); //FALLTHRU
|
||||
case 24: wp[-24] = '0' | static_cast<char>((val >> 23) & 1); //FALLTHRU
|
||||
case 23: wp[-23] = '0' | static_cast<char>((val >> 22) & 1); //FALLTHRU
|
||||
case 22: wp[-22] = '0' | static_cast<char>((val >> 21) & 1); //FALLTHRU
|
||||
case 21: wp[-21] = '0' | static_cast<char>((val >> 20) & 1); //FALLTHRU
|
||||
case 20: wp[-20] = '0' | static_cast<char>((val >> 19) & 1); //FALLTHRU
|
||||
case 19: wp[-19] = '0' | static_cast<char>((val >> 18) & 1); //FALLTHRU
|
||||
case 18: wp[-18] = '0' | static_cast<char>((val >> 17) & 1); //FALLTHRU
|
||||
case 17: wp[-17] = '0' | static_cast<char>((val >> 16) & 1); //FALLTHRU
|
||||
case 16: wp[-16] = '0' | static_cast<char>((val >> 15) & 1); //FALLTHRU
|
||||
case 15: wp[-15] = '0' | static_cast<char>((val >> 14) & 1); //FALLTHRU
|
||||
case 14: wp[-14] = '0' | static_cast<char>((val >> 13) & 1); //FALLTHRU
|
||||
case 13: wp[-13] = '0' | static_cast<char>((val >> 12) & 1); //FALLTHRU
|
||||
case 12: wp[-12] = '0' | static_cast<char>((val >> 11) & 1); //FALLTHRU
|
||||
case 11: wp[-11] = '0' | static_cast<char>((val >> 10) & 1); //FALLTHRU
|
||||
case 10: wp[-10] = '0' | static_cast<char>((val >> 9) & 1); //FALLTHRU
|
||||
case 9: wp[ -9] = '0' | static_cast<char>((val >> 8) & 1); //FALLTHRU
|
||||
case 8: wp[ -8] = '0' | static_cast<char>((val >> 7) & 1); //FALLTHRU
|
||||
case 7: wp[ -7] = '0' | static_cast<char>((val >> 6) & 1); //FALLTHRU
|
||||
case 6: wp[ -6] = '0' | static_cast<char>((val >> 5) & 1); //FALLTHRU
|
||||
case 5: wp[ -5] = '0' | static_cast<char>((val >> 4) & 1); //FALLTHRU
|
||||
case 4: wp[ -4] = '0' | static_cast<char>((val >> 3) & 1); //FALLTHRU
|
||||
case 3: wp[ -3] = '0' | static_cast<char>((val >> 2) & 1); //FALLTHRU
|
||||
case 2: wp[ -2] = '0' | static_cast<char>((val >> 1) & 1); //FALLTHRU
|
||||
case 1: wp[ -1] = '0' | static_cast<char>((val ) & 1); //FALLTHRU
|
||||
}
|
||||
// clang-format on
|
||||
// Handle the remaining words
|
||||
while (words > 0) {
|
||||
vluint32_t val = newvalp[--words];
|
||||
int bits = 32;
|
||||
int loops = 4;
|
||||
do {
|
||||
*wp++ = '0' | static_cast<char>(val >> 31);
|
||||
val <<= 1;
|
||||
} while (--bits);
|
||||
wp[0] = '0' | static_cast<char>((val >> 31));
|
||||
wp[1] = '0' | static_cast<char>((val >> 30) & 1);
|
||||
wp[2] = '0' | static_cast<char>((val >> 29) & 1);
|
||||
wp[3] = '0' | static_cast<char>((val >> 28) & 1);
|
||||
wp[4] = '0' | static_cast<char>((val >> 27) & 1);
|
||||
wp[5] = '0' | static_cast<char>((val >> 26) & 1);
|
||||
wp[6] = '0' | static_cast<char>((val >> 25) & 1);
|
||||
wp[7] = '0' | static_cast<char>((val >> 24) & 1);
|
||||
wp += 8;
|
||||
val <<= 8;
|
||||
} while (--loops);
|
||||
}
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitFloat(vluint32_t code, float newval) {
|
||||
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
|
||||
char* wp = m_writep;
|
||||
// Buffer can't overflow before sprintf; we sized during declaration
|
||||
sprintf(wp, "r%.16g", static_cast<double>(newval));
|
||||
@ -767,7 +835,9 @@ void VerilatedVcd::emitFloat(vluint32_t code, float newval) {
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
void VerilatedVcd::emitDouble(vluint32_t code, double newval) {
|
||||
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
|
||||
char* wp = m_writep;
|
||||
// Buffer can't overflow before sprintf; we sized during declaration
|
||||
sprintf(wp, "r%.16g", newval);
|
||||
@ -775,6 +845,8 @@ void VerilatedVcd::emitDouble(vluint32_t code, double newval) {
|
||||
finishLine(code, wp);
|
||||
}
|
||||
|
||||
#undef VL_VCD_SUFFIXP
|
||||
|
||||
#ifdef VL_TRACE_VCD_OLD_API
|
||||
|
||||
void VerilatedVcd::fullBit(vluint32_t code, const vluint32_t newval) {
|
||||
|
@ -122,13 +122,14 @@ protected:
|
||||
bool preFullDump() VL_OVERRIDE { return isOpen(); }
|
||||
bool preChangeDump() VL_OVERRIDE;
|
||||
|
||||
// Implementations of duck-typed methods for VerilatedTrace
|
||||
void emitBit(vluint32_t code, vluint32_t newval);
|
||||
template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval);
|
||||
void emitQuad(vluint32_t code, vluint64_t newval, int bits);
|
||||
void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
|
||||
void emitFloat(vluint32_t code, float newval);
|
||||
void emitDouble(vluint32_t code, double newval);
|
||||
// Implementations of duck-typed methods for VerilatedTrace. These are
|
||||
// called from only one place (namely full*) so always inline them.
|
||||
inline void emitBit(vluint32_t code, vluint32_t newval);
|
||||
inline void emitBus(vluint32_t code, vluint32_t newval, int bits);
|
||||
inline void emitQuad(vluint32_t code, vluint64_t newval, int bits);
|
||||
inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
|
||||
inline void emitFloat(vluint32_t code, float newval);
|
||||
inline void emitDouble(vluint32_t code, double newval);
|
||||
|
||||
public:
|
||||
//=========================================================================
|
||||
@ -179,8 +180,8 @@ public:
|
||||
// Write back to previous value buffer value and emit
|
||||
|
||||
void fullBit(vluint32_t* oldp, vluint32_t newval) { fullBit(oldp - this->oldp(0), newval); }
|
||||
template <int T_Bits> void fullBus(vluint32_t* oldp, vluint32_t newval) {
|
||||
fullBus(oldp - this->oldp(0), newval, T_Bits);
|
||||
void fullBus(vluint32_t* oldp, vluint32_t newval, int bits) {
|
||||
fullBus(oldp - this->oldp(0), newval, bits);
|
||||
}
|
||||
void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
|
||||
fullQuad(oldp - this->oldp(0), newval, bits);
|
||||
@ -195,8 +196,8 @@ public:
|
||||
// Check previous value and emit if changed
|
||||
|
||||
void chgBit(vluint32_t* oldp, vluint32_t newval) { chgBit(oldp - this->oldp(0), newval); }
|
||||
template <int T_Bits> void chgBus(vluint32_t* oldp, vluint32_t newval) {
|
||||
chgBus(oldp - this->oldp(0), newval, T_Bits);
|
||||
void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) {
|
||||
chgBus(oldp - this->oldp(0), newval, bits);
|
||||
}
|
||||
void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
|
||||
chgQuad(oldp - this->oldp(0), newval, bits);
|
||||
|
@ -3561,7 +3561,8 @@ class EmitCTrace : EmitCStmts {
|
||||
puts("vcdp->" + full + "Quad");
|
||||
emitWidth = true;
|
||||
} else if (nodep->declp()->widthMin() > 1) {
|
||||
puts("vcdp->" + full + "Bus<" + cvtToStr(nodep->declp()->widthMin()) + ">");
|
||||
puts("vcdp->" + full + "Bus");
|
||||
emitWidth = true;
|
||||
} else {
|
||||
puts("vcdp->" + full + "Bit");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user