Various minor optimizations of VCD trace routines

- Change templated trace routines to branch table.

Removed templating from trace chgBus and fullBus and replaced them with
a branch table like the other there is a very small (< 1%) penalty for
this on SwerRV EH1 CoreMark, but this is less than the variability of
disk IO so it's worth it to keep the code simpler and smaller.

- Prefetch VCD suffix buffer at the top of emit*

- Increase ILP in VCD emit* routines

- Use a 64-bit unaligned store to emit the VCD suffix (on x86 only)

The performance difference with these is very small, but the changes
hopefully make this code more performance-portable across various
micro-architectures.
This commit is contained in:
Geza Lore 2020-04-25 19:37:59 +01:00
parent 70549e1a64
commit b79ef672e1
7 changed files with 205 additions and 192 deletions

View File

@ -208,21 +208,31 @@ void VerilatedFst::declDouble(vluint32_t code, const char* name, int dtypenum, f
declSymbol(code, name, dtypenum, vardir, vartype, array, arraynum, 2, 64);
}
// Note: emit* are only ever called from one place (full* in
// verilated_trace_imp.cpp, which is included in this file at the top),
// so always inline them.
VL_ATTR_ALWINLINE
void VerilatedFst::emitBit(vluint32_t code, vluint32_t newval) {
fstWriterEmitValueChange(m_fst, m_symbolp[code], newval ? "1" : "0");
}
template <int T_Bits> void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval) {
fstWriterEmitValueChange32(m_fst, m_symbolp[code], T_Bits, newval);
VL_ATTR_ALWINLINE
void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval, int bits) {
fstWriterEmitValueChange32(m_fst, m_symbolp[code], bits, newval);
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitQuad(vluint32_t code, vluint64_t newval, int bits) {
fstWriterEmitValueChange64(m_fst, m_symbolp[code], bits, newval);
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) {
fstWriterEmitValueChangeVec32(m_fst, m_symbolp[code], bits, newvalp);
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitFloat(vluint32_t code, float newval) {
fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitDouble(vluint32_t code, double newval) {
fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
}

View File

@ -67,13 +67,14 @@ protected:
bool preFullDump() VL_OVERRIDE { return isOpen(); }
bool preChangeDump() VL_OVERRIDE { return isOpen(); }
// Implementations of duck-typed methods for VerilatedTrace
void emitBit(vluint32_t code, vluint32_t newval);
template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval);
void emitQuad(vluint32_t code, vluint64_t newval, int bits);
void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
void emitFloat(vluint32_t code, float newval);
void emitDouble(vluint32_t code, double newval);
// Implementations of duck-typed methods for VerilatedTrace. These are
// called from only one place (namely full*) so always inline them.
inline void emitBit(vluint32_t code, vluint32_t newval);
inline void emitBus(vluint32_t code, vluint32_t newval, int bits);
inline void emitQuad(vluint32_t code, vluint64_t newval, int bits);
inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
inline void emitFloat(vluint32_t code, float newval);
inline void emitDouble(vluint32_t code, double newval);
public:
//=========================================================================

View File

@ -262,7 +262,7 @@ public:
// this is very hot code during tracing.
// duck-typed void emitBit(vluint32_t code, vluint32_t newval) = 0;
// duck-typed template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval) = 0;
// duck-typed void emitBus(vluint32_t code, vluint32_t newval, int bits) = 0;
// duck-typed void emitQuad(vluint32_t code, vluint64_t newval, int bits) = 0;
// duck-typed void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) = 0;
// duck-typed void emitFloat(vluint32_t code, float newval) = 0;
@ -272,7 +272,7 @@ public:
// Write to previous value buffer value and emit trace entry.
void fullBit(vluint32_t* oldp, vluint32_t newval);
template <int T_Bits> void fullBus(vluint32_t* oldp, vluint32_t newval);
void fullBus(vluint32_t* oldp, vluint32_t newval, int bits);
void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits);
void fullArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits);
void fullFloat(vluint32_t* oldp, float newval);
@ -286,8 +286,8 @@ public:
m_traceBufferWritep += 2;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
template <int T_Bits> inline void chgBus(vluint32_t* oldp, vluint32_t newval) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | T_Bits;
inline void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | bits;
m_traceBufferWritep[1].oldp = oldp;
m_traceBufferWritep[2].newBits = newval;
m_traceBufferWritep += 3;
@ -339,9 +339,9 @@ public:
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullBit(oldp, newval);
}
template <int T_Bits> inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval) {
inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval, int bits) {
const vluint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullBus<T_Bits>(oldp, newval);
if (VL_UNLIKELY(diff)) fullBus(oldp, newval, bits);
}
inline void CHG(Quad)(vluint32_t* oldp, vluint64_t newval, int bits) {
const vluint64_t diff = *reinterpret_cast<vluint64_t*>(oldp) ^ newval;

View File

@ -161,50 +161,15 @@ template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() {
continue;
case VerilatedTraceCommand::CHG_BUS:
VL_TRACE_THREAD_DEBUG("Command CHG_BUS");
oldp = (readp++)->oldp;
newBits = (readp++)->newBits;
// Bits stored in bottom byte of command
switch (cmd & 0xFFU) {
case 2: chgBusImpl<2>(oldp, newBits); continue;
case 3: chgBusImpl<3>(oldp, newBits); continue;
case 4: chgBusImpl<4>(oldp, newBits); continue;
case 5: chgBusImpl<5>(oldp, newBits); continue;
case 6: chgBusImpl<6>(oldp, newBits); continue;
case 7: chgBusImpl<7>(oldp, newBits); continue;
case 8: chgBusImpl<8>(oldp, newBits); continue;
case 9: chgBusImpl<9>(oldp, newBits); continue;
case 10: chgBusImpl<10>(oldp, newBits); continue;
case 11: chgBusImpl<11>(oldp, newBits); continue;
case 12: chgBusImpl<12>(oldp, newBits); continue;
case 13: chgBusImpl<13>(oldp, newBits); continue;
case 14: chgBusImpl<14>(oldp, newBits); continue;
case 15: chgBusImpl<15>(oldp, newBits); continue;
case 16: chgBusImpl<16>(oldp, newBits); continue;
case 17: chgBusImpl<17>(oldp, newBits); continue;
case 18: chgBusImpl<18>(oldp, newBits); continue;
case 19: chgBusImpl<19>(oldp, newBits); continue;
case 20: chgBusImpl<20>(oldp, newBits); continue;
case 21: chgBusImpl<21>(oldp, newBits); continue;
case 22: chgBusImpl<22>(oldp, newBits); continue;
case 23: chgBusImpl<23>(oldp, newBits); continue;
case 24: chgBusImpl<24>(oldp, newBits); continue;
case 25: chgBusImpl<25>(oldp, newBits); continue;
case 26: chgBusImpl<26>(oldp, newBits); continue;
case 27: chgBusImpl<27>(oldp, newBits); continue;
case 28: chgBusImpl<28>(oldp, newBits); continue;
case 29: chgBusImpl<29>(oldp, newBits); continue;
case 30: chgBusImpl<30>(oldp, newBits); continue;
case 31: chgBusImpl<31>(oldp, newBits); continue;
case 32: chgBusImpl<32>(oldp, newBits); continue;
}
VL_FATAL_MT(__FILE__, __LINE__, "", "Bad number of bits in CHG_BUS command");
break;
chgBusImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL);
readp += 2;
VL_TRACE_THREAD_DEBUG("Command CHG_BUS DONE");
continue;
case VerilatedTraceCommand::CHG_QUAD:
VL_TRACE_THREAD_DEBUG("Command CHG_QUAD");
// Bits stored in bottom byte of command
chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFF);
chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL);
readp += 2;
continue;
case VerilatedTraceCommand::CHG_ARRAY:
@ -516,49 +481,12 @@ template <> void VerilatedTrace<VL_DERIVED_T>::fullBit(vluint32_t* oldp, vluint3
self()->emitBit(oldp - m_sigs_oldvalp, newval);
}
// We want these functions specialized for sizes to avoid hard to predict
// branches, but we don't want them inlined, so we explicitly instantiate the
// template for each size used by Verilator.
template <>
template <int T_Bits>
void VerilatedTrace<VL_DERIVED_T>::fullBus(vluint32_t* oldp, vluint32_t newval) {
void VerilatedTrace<VL_DERIVED_T>::fullBus(vluint32_t* oldp, vluint32_t newval, int bits) {
*oldp = newval;
self()->emitBus<T_Bits>(oldp - m_sigs_oldvalp, newval);
self()->emitBus(oldp - m_sigs_oldvalp, newval, bits);
}
// Note: No specialization for width 1, covered by 'fullBit'
template void VerilatedTrace<VL_DERIVED_T>::fullBus<2>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<3>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<4>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<5>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<6>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<7>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<8>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<9>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<10>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<11>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<12>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<13>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<14>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<15>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<16>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<17>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<18>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<19>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<20>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<21>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<22>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<23>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<24>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<25>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<26>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<27>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<28>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<29>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<30>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<31>(vluint32_t* oldp, vluint32_t newval);
template void VerilatedTrace<VL_DERIVED_T>::fullBus<32>(vluint32_t* oldp, vluint32_t newval);
template <>
void VerilatedTrace<VL_DERIVED_T>::fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
*reinterpret_cast<vluint64_t*>(oldp) = newval;

View File

@ -611,15 +611,20 @@ void VerilatedVcd::declTriArray(vluint32_t code, const char* name, bool array, i
//=============================================================================
// Emit trace entries
#define VL_VCD_SUFFIXP(code) (m_suffixesp + (code)*VL_TRACE_SUFFIX_ENTRY_SIZE)
// Emit suffix, write back write pointer, check buffer
void VerilatedVcd::finishLine(vluint32_t code, char* writep) {
const char* const suffixp = m_suffixesp + code * VL_TRACE_SUFFIX_ENTRY_SIZE;
const char* const suffixp = VL_VCD_SUFFIXP(code);
// Copy the whole suffix (this avoid having hard to predict branches which
// helps a lot). Note suffixp could be aligned, so could load it in one go,
// but then we would be endiannes dependent which we don't have a way to
// test right now and probably would make little difference...
// Note: The maximum length of the suffix is
// helps a lot). Note: The maximum length of the suffix is
// VL_TRACE_MAX_VCD_CODE_SIZE + 2 == 7, but we unroll this here for speed.
#ifdef __x86_64__
// Copy the whole 8 bytes in one go, this works on little-endian machines
// supporting unaligned stores.
*reinterpret_cast<vluint64_t*>(writep) = *reinterpret_cast<const vluint64_t*>(suffixp);
#else
// Portable variant
writep[0] = suffixp[0];
writep[1] = suffixp[1];
writep[2] = suffixp[2];
@ -627,139 +632,202 @@ void VerilatedVcd::finishLine(vluint32_t code, char* writep) {
writep[4] = suffixp[4];
writep[5] = suffixp[5];
writep[6] = '\n'; // The 6th index is always '\n' if it's relevant, no need to fetch it.
#endif
// Now write back the write pointer incremented by the actual size of the
// suffix, which was stored in the last byte of the suffix buffer entry.
m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1];
bufferCheck();
}
// Note: emit* are only ever called from one place (full* in
// verilated_trace_imp.cpp, which is included in this file at the top),
// so always inline them.
VL_ATTR_ALWINLINE
void VerilatedVcd::emitBit(vluint32_t code, vluint32_t newval) {
// Don't prefetch suffix as it's a bit too late;
char* wp = m_writep;
*wp++ = '0' | static_cast<char>(newval);
finishLine(code, wp);
}
template <int T_Bits> void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval) {
VL_ATTR_ALWINLINE
void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval, int bits) {
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
char* wp = m_writep;
*wp++ = 'b';
newval <<= 32 - T_Bits;
int bits = T_Bits;
do {
*wp++ = '0' | static_cast<char>(newval >> 31);
newval <<= 1;
} while (--bits);
wp += bits;
// clang-format off
switch (bits) {
case 32: wp[-32] = '0' | static_cast<char>((newval >> 31) ); //FALLTHRU
case 31: wp[-31] = '0' | static_cast<char>((newval >> 30) & 1); //FALLTHRU
case 30: wp[-30] = '0' | static_cast<char>((newval >> 29) & 1); //FALLTHRU
case 29: wp[-29] = '0' | static_cast<char>((newval >> 28) & 1); //FALLTHRU
case 28: wp[-28] = '0' | static_cast<char>((newval >> 27) & 1); //FALLTHRU
case 27: wp[-27] = '0' | static_cast<char>((newval >> 26) & 1); //FALLTHRU
case 26: wp[-26] = '0' | static_cast<char>((newval >> 25) & 1); //FALLTHRU
case 25: wp[-25] = '0' | static_cast<char>((newval >> 24) & 1); //FALLTHRU
case 24: wp[-24] = '0' | static_cast<char>((newval >> 23) & 1); //FALLTHRU
case 23: wp[-23] = '0' | static_cast<char>((newval >> 22) & 1); //FALLTHRU
case 22: wp[-22] = '0' | static_cast<char>((newval >> 21) & 1); //FALLTHRU
case 21: wp[-21] = '0' | static_cast<char>((newval >> 20) & 1); //FALLTHRU
case 20: wp[-20] = '0' | static_cast<char>((newval >> 19) & 1); //FALLTHRU
case 19: wp[-19] = '0' | static_cast<char>((newval >> 18) & 1); //FALLTHRU
case 18: wp[-18] = '0' | static_cast<char>((newval >> 17) & 1); //FALLTHRU
case 17: wp[-17] = '0' | static_cast<char>((newval >> 16) & 1); //FALLTHRU
case 16: wp[-16] = '0' | static_cast<char>((newval >> 15) & 1); //FALLTHRU
case 15: wp[-15] = '0' | static_cast<char>((newval >> 14) & 1); //FALLTHRU
case 14: wp[-14] = '0' | static_cast<char>((newval >> 13) & 1); //FALLTHRU
case 13: wp[-13] = '0' | static_cast<char>((newval >> 12) & 1); //FALLTHRU
case 12: wp[-12] = '0' | static_cast<char>((newval >> 11) & 1); //FALLTHRU
case 11: wp[-11] = '0' | static_cast<char>((newval >> 10) & 1); //FALLTHRU
case 10: wp[-10] = '0' | static_cast<char>((newval >> 9) & 1); //FALLTHRU
case 9: wp[ -9] = '0' | static_cast<char>((newval >> 8) & 1); //FALLTHRU
case 8: wp[ -8] = '0' | static_cast<char>((newval >> 7) & 1); //FALLTHRU
case 7: wp[ -7] = '0' | static_cast<char>((newval >> 6) & 1); //FALLTHRU
case 6: wp[ -6] = '0' | static_cast<char>((newval >> 5) & 1); //FALLTHRU
case 5: wp[ -5] = '0' | static_cast<char>((newval >> 4) & 1); //FALLTHRU
case 4: wp[ -4] = '0' | static_cast<char>((newval >> 3) & 1); //FALLTHRU
case 3: wp[ -3] = '0' | static_cast<char>((newval >> 2) & 1); //FALLTHRU
case 2: wp[ -2] = '0' | static_cast<char>((newval >> 1) & 1); //FALLTHRU
/*bit*/ wp[ -1] = '0' | static_cast<char>((newval ) & 1); //FALLTHRU
}
// clang-format on
finishLine(code, wp);
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitQuad(vluint32_t code, vluint64_t newval, int bits) {
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
char* wp = m_writep;
*wp++ = 'b';
newval <<= 64 - bits;
// Handle the top 32 bits within the 64 bit input
const int bitsInTopHalf = bits - 32;
wp += bitsInTopHalf;
// clang-format off
switch (bitsInTopHalf) {
case 32: wp[-32] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 31: wp[-31] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 30: wp[-30] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 29: wp[-29] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 28: wp[-28] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 27: wp[-27] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 26: wp[-26] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 25: wp[-25] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 24: wp[-24] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 23: wp[-23] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 22: wp[-22] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 21: wp[-21] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 20: wp[-20] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 19: wp[-19] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 18: wp[-18] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 17: wp[-17] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 16: wp[-16] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 15: wp[-15] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 14: wp[-14] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 13: wp[-13] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 12: wp[-12] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 11: wp[-11] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 10: wp[-10] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 9: wp[ -9] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 8: wp[ -8] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 7: wp[ -7] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 6: wp[ -6] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 5: wp[ -5] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 4: wp[ -4] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 3: wp[ -3] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 2: wp[ -2] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 1: wp[ -1] = '0' | static_cast<char>(newval >> 63); newval<<=1; //FALLTHRU
case 32: wp[-32] = '0' | static_cast<char>((newval >> 63) ); //FALLTHRU
case 31: wp[-31] = '0' | static_cast<char>((newval >> 62) & 1); //FALLTHRU
case 30: wp[-30] = '0' | static_cast<char>((newval >> 61) & 1); //FALLTHRU
case 29: wp[-29] = '0' | static_cast<char>((newval >> 60) & 1); //FALLTHRU
case 28: wp[-28] = '0' | static_cast<char>((newval >> 59) & 1); //FALLTHRU
case 27: wp[-27] = '0' | static_cast<char>((newval >> 58) & 1); //FALLTHRU
case 26: wp[-26] = '0' | static_cast<char>((newval >> 57) & 1); //FALLTHRU
case 25: wp[-25] = '0' | static_cast<char>((newval >> 56) & 1); //FALLTHRU
case 24: wp[-24] = '0' | static_cast<char>((newval >> 55) & 1); //FALLTHRU
case 23: wp[-23] = '0' | static_cast<char>((newval >> 54) & 1); //FALLTHRU
case 22: wp[-22] = '0' | static_cast<char>((newval >> 53) & 1); //FALLTHRU
case 21: wp[-21] = '0' | static_cast<char>((newval >> 52) & 1); //FALLTHRU
case 20: wp[-20] = '0' | static_cast<char>((newval >> 51) & 1); //FALLTHRU
case 19: wp[-19] = '0' | static_cast<char>((newval >> 50) & 1); //FALLTHRU
case 18: wp[-18] = '0' | static_cast<char>((newval >> 49) & 1); //FALLTHRU
case 17: wp[-17] = '0' | static_cast<char>((newval >> 48) & 1); //FALLTHRU
case 16: wp[-16] = '0' | static_cast<char>((newval >> 47) & 1); //FALLTHRU
case 15: wp[-15] = '0' | static_cast<char>((newval >> 46) & 1); //FALLTHRU
case 14: wp[-14] = '0' | static_cast<char>((newval >> 45) & 1); //FALLTHRU
case 13: wp[-13] = '0' | static_cast<char>((newval >> 44) & 1); //FALLTHRU
case 12: wp[-12] = '0' | static_cast<char>((newval >> 43) & 1); //FALLTHRU
case 11: wp[-11] = '0' | static_cast<char>((newval >> 42) & 1); //FALLTHRU
case 10: wp[-10] = '0' | static_cast<char>((newval >> 41) & 1); //FALLTHRU
case 9: wp[ -9] = '0' | static_cast<char>((newval >> 40) & 1); //FALLTHRU
case 8: wp[ -8] = '0' | static_cast<char>((newval >> 39) & 1); //FALLTHRU
case 7: wp[ -7] = '0' | static_cast<char>((newval >> 38) & 1); //FALLTHRU
case 6: wp[ -6] = '0' | static_cast<char>((newval >> 37) & 1); //FALLTHRU
case 5: wp[ -5] = '0' | static_cast<char>((newval >> 36) & 1); //FALLTHRU
case 4: wp[ -4] = '0' | static_cast<char>((newval >> 35) & 1); //FALLTHRU
case 3: wp[ -3] = '0' | static_cast<char>((newval >> 34) & 1); //FALLTHRU
case 2: wp[ -2] = '0' | static_cast<char>((newval >> 33) & 1); //FALLTHRU
case 1: wp[ -1] = '0' | static_cast<char>((newval >> 32) & 1); //FALLTHRU
}
// clang-format on
// Handle the bottom 32 bits within the 64 bit input
int remaining = 32;
vluint32_t val = static_cast<vluint32_t>(newval); // Truncate to bottom 32 bits
int loops = 4;
do {
*wp++ = '0' | static_cast<char>(newval >> 63);
newval <<= 1;
} while (--remaining);
wp[0] = '0' | static_cast<char>((val >> 31));
wp[1] = '0' | static_cast<char>((val >> 30) & 1);
wp[2] = '0' | static_cast<char>((val >> 29) & 1);
wp[3] = '0' | static_cast<char>((val >> 28) & 1);
wp[4] = '0' | static_cast<char>((val >> 27) & 1);
wp[5] = '0' | static_cast<char>((val >> 26) & 1);
wp[6] = '0' | static_cast<char>((val >> 25) & 1);
wp[7] = '0' | static_cast<char>((val >> 24) & 1);
wp += 8;
val <<= 8;
} while (--loops);
finishLine(code, wp);
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) {
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
int words = (bits + 31) / 32;
char* wp = m_writep;
*wp++ = 'b';
// Handle the most significant word
vluint32_t val = newvalp[--words];
const int bitsInMSW = bits % 32 == 0 ? 32 : bits % 32;
vluint32_t val = newvalp[--words] << (32 - bitsInMSW);
wp += bitsInMSW;
// clang-format off
switch (bitsInMSW) {
case 32: wp[-32] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 31: wp[-31] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 30: wp[-30] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 29: wp[-29] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 28: wp[-28] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 27: wp[-27] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 26: wp[-26] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 25: wp[-25] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 24: wp[-24] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 23: wp[-23] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 22: wp[-22] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 21: wp[-21] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 20: wp[-20] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 19: wp[-19] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 18: wp[-18] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 17: wp[-17] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 16: wp[-16] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 15: wp[-15] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 14: wp[-14] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 13: wp[-13] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 12: wp[-12] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 11: wp[-11] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 10: wp[-10] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 9: wp[ -9] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 8: wp[ -8] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 7: wp[ -7] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 6: wp[ -6] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 5: wp[ -5] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 4: wp[ -4] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 3: wp[ -3] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 2: wp[ -2] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 1: wp[ -1] = '0' | static_cast<char>(val >> 31); val<<=1; //FALLTHRU
case 32: wp[-32] = '0' | static_cast<char>((val >> 31) ); //FALLTHRU
case 31: wp[-31] = '0' | static_cast<char>((val >> 30) & 1); //FALLTHRU
case 30: wp[-30] = '0' | static_cast<char>((val >> 29) & 1); //FALLTHRU
case 29: wp[-29] = '0' | static_cast<char>((val >> 28) & 1); //FALLTHRU
case 28: wp[-28] = '0' | static_cast<char>((val >> 27) & 1); //FALLTHRU
case 27: wp[-27] = '0' | static_cast<char>((val >> 26) & 1); //FALLTHRU
case 26: wp[-26] = '0' | static_cast<char>((val >> 25) & 1); //FALLTHRU
case 25: wp[-25] = '0' | static_cast<char>((val >> 24) & 1); //FALLTHRU
case 24: wp[-24] = '0' | static_cast<char>((val >> 23) & 1); //FALLTHRU
case 23: wp[-23] = '0' | static_cast<char>((val >> 22) & 1); //FALLTHRU
case 22: wp[-22] = '0' | static_cast<char>((val >> 21) & 1); //FALLTHRU
case 21: wp[-21] = '0' | static_cast<char>((val >> 20) & 1); //FALLTHRU
case 20: wp[-20] = '0' | static_cast<char>((val >> 19) & 1); //FALLTHRU
case 19: wp[-19] = '0' | static_cast<char>((val >> 18) & 1); //FALLTHRU
case 18: wp[-18] = '0' | static_cast<char>((val >> 17) & 1); //FALLTHRU
case 17: wp[-17] = '0' | static_cast<char>((val >> 16) & 1); //FALLTHRU
case 16: wp[-16] = '0' | static_cast<char>((val >> 15) & 1); //FALLTHRU
case 15: wp[-15] = '0' | static_cast<char>((val >> 14) & 1); //FALLTHRU
case 14: wp[-14] = '0' | static_cast<char>((val >> 13) & 1); //FALLTHRU
case 13: wp[-13] = '0' | static_cast<char>((val >> 12) & 1); //FALLTHRU
case 12: wp[-12] = '0' | static_cast<char>((val >> 11) & 1); //FALLTHRU
case 11: wp[-11] = '0' | static_cast<char>((val >> 10) & 1); //FALLTHRU
case 10: wp[-10] = '0' | static_cast<char>((val >> 9) & 1); //FALLTHRU
case 9: wp[ -9] = '0' | static_cast<char>((val >> 8) & 1); //FALLTHRU
case 8: wp[ -8] = '0' | static_cast<char>((val >> 7) & 1); //FALLTHRU
case 7: wp[ -7] = '0' | static_cast<char>((val >> 6) & 1); //FALLTHRU
case 6: wp[ -6] = '0' | static_cast<char>((val >> 5) & 1); //FALLTHRU
case 5: wp[ -5] = '0' | static_cast<char>((val >> 4) & 1); //FALLTHRU
case 4: wp[ -4] = '0' | static_cast<char>((val >> 3) & 1); //FALLTHRU
case 3: wp[ -3] = '0' | static_cast<char>((val >> 2) & 1); //FALLTHRU
case 2: wp[ -2] = '0' | static_cast<char>((val >> 1) & 1); //FALLTHRU
case 1: wp[ -1] = '0' | static_cast<char>((val ) & 1); //FALLTHRU
}
// clang-format on
// Handle the remaining words
while (words > 0) {
vluint32_t val = newvalp[--words];
int bits = 32;
int loops = 4;
do {
*wp++ = '0' | static_cast<char>(val >> 31);
val <<= 1;
} while (--bits);
wp[0] = '0' | static_cast<char>((val >> 31));
wp[1] = '0' | static_cast<char>((val >> 30) & 1);
wp[2] = '0' | static_cast<char>((val >> 29) & 1);
wp[3] = '0' | static_cast<char>((val >> 28) & 1);
wp[4] = '0' | static_cast<char>((val >> 27) & 1);
wp[5] = '0' | static_cast<char>((val >> 26) & 1);
wp[6] = '0' | static_cast<char>((val >> 25) & 1);
wp[7] = '0' | static_cast<char>((val >> 24) & 1);
wp += 8;
val <<= 8;
} while (--loops);
}
finishLine(code, wp);
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitFloat(vluint32_t code, float newval) {
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
char* wp = m_writep;
// Buffer can't overflow before sprintf; we sized during declaration
sprintf(wp, "r%.16g", static_cast<double>(newval));
@ -767,7 +835,9 @@ void VerilatedVcd::emitFloat(vluint32_t code, float newval) {
finishLine(code, wp);
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitDouble(vluint32_t code, double newval) {
VL_PREFETCH_RD(VL_VCD_SUFFIXP(code));
char* wp = m_writep;
// Buffer can't overflow before sprintf; we sized during declaration
sprintf(wp, "r%.16g", newval);
@ -775,6 +845,8 @@ void VerilatedVcd::emitDouble(vluint32_t code, double newval) {
finishLine(code, wp);
}
#undef VL_VCD_SUFFIXP
#ifdef VL_TRACE_VCD_OLD_API
void VerilatedVcd::fullBit(vluint32_t code, const vluint32_t newval) {

View File

@ -122,13 +122,14 @@ protected:
bool preFullDump() VL_OVERRIDE { return isOpen(); }
bool preChangeDump() VL_OVERRIDE;
// Implementations of duck-typed methods for VerilatedTrace
void emitBit(vluint32_t code, vluint32_t newval);
template <int T_Bits> void emitBus(vluint32_t code, vluint32_t newval);
void emitQuad(vluint32_t code, vluint64_t newval, int bits);
void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
void emitFloat(vluint32_t code, float newval);
void emitDouble(vluint32_t code, double newval);
// Implementations of duck-typed methods for VerilatedTrace. These are
// called from only one place (namely full*) so always inline them.
inline void emitBit(vluint32_t code, vluint32_t newval);
inline void emitBus(vluint32_t code, vluint32_t newval, int bits);
inline void emitQuad(vluint32_t code, vluint64_t newval, int bits);
inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits);
inline void emitFloat(vluint32_t code, float newval);
inline void emitDouble(vluint32_t code, double newval);
public:
//=========================================================================
@ -179,8 +180,8 @@ public:
// Write back to previous value buffer value and emit
void fullBit(vluint32_t* oldp, vluint32_t newval) { fullBit(oldp - this->oldp(0), newval); }
template <int T_Bits> void fullBus(vluint32_t* oldp, vluint32_t newval) {
fullBus(oldp - this->oldp(0), newval, T_Bits);
void fullBus(vluint32_t* oldp, vluint32_t newval, int bits) {
fullBus(oldp - this->oldp(0), newval, bits);
}
void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
fullQuad(oldp - this->oldp(0), newval, bits);
@ -195,8 +196,8 @@ public:
// Check previous value and emit if changed
void chgBit(vluint32_t* oldp, vluint32_t newval) { chgBit(oldp - this->oldp(0), newval); }
template <int T_Bits> void chgBus(vluint32_t* oldp, vluint32_t newval) {
chgBus(oldp - this->oldp(0), newval, T_Bits);
void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) {
chgBus(oldp - this->oldp(0), newval, bits);
}
void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) {
chgQuad(oldp - this->oldp(0), newval, bits);

View File

@ -3561,7 +3561,8 @@ class EmitCTrace : EmitCStmts {
puts("vcdp->" + full + "Quad");
emitWidth = true;
} else if (nodep->declp()->widthMin() > 1) {
puts("vcdp->" + full + "Bus<" + cvtToStr(nodep->declp()->widthMin()) + ">");
puts("vcdp->" + full + "Bus");
emitWidth = true;
} else {
puts("vcdp->" + full + "Bit");
}