diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp index 1e1d44434..c4b1dc077 100644 --- a/include/verilated_fst_c.cpp +++ b/include/verilated_fst_c.cpp @@ -208,21 +208,31 @@ void VerilatedFst::declDouble(vluint32_t code, const char* name, int dtypenum, f declSymbol(code, name, dtypenum, vardir, vartype, array, arraynum, 2, 64); } +// Note: emit* are only ever called from one place (full* in +// verilated_trace_imp.cpp, which is included in this file at the top), +// so always inline them. + +VL_ATTR_ALWINLINE void VerilatedFst::emitBit(vluint32_t code, vluint32_t newval) { fstWriterEmitValueChange(m_fst, m_symbolp[code], newval ? "1" : "0"); } -template void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval) { - fstWriterEmitValueChange32(m_fst, m_symbolp[code], T_Bits, newval); +VL_ATTR_ALWINLINE +void VerilatedFst::emitBus(vluint32_t code, vluint32_t newval, int bits) { + fstWriterEmitValueChange32(m_fst, m_symbolp[code], bits, newval); } +VL_ATTR_ALWINLINE void VerilatedFst::emitQuad(vluint32_t code, vluint64_t newval, int bits) { fstWriterEmitValueChange64(m_fst, m_symbolp[code], bits, newval); } +VL_ATTR_ALWINLINE void VerilatedFst::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) { fstWriterEmitValueChangeVec32(m_fst, m_symbolp[code], bits, newvalp); } +VL_ATTR_ALWINLINE void VerilatedFst::emitFloat(vluint32_t code, float newval) { fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval); } +VL_ATTR_ALWINLINE void VerilatedFst::emitDouble(vluint32_t code, double newval) { fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval); } diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index 8572a0f5f..b80d8ea7a 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -67,13 +67,14 @@ protected: bool preFullDump() VL_OVERRIDE { return isOpen(); } bool preChangeDump() VL_OVERRIDE { return isOpen(); } - // Implementations of duck-typed methods for VerilatedTrace - void emitBit(vluint32_t code, vluint32_t newval); - template void emitBus(vluint32_t code, vluint32_t newval); - void emitQuad(vluint32_t code, vluint64_t newval, int bits); - void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits); - void emitFloat(vluint32_t code, float newval); - void emitDouble(vluint32_t code, double newval); + // Implementations of duck-typed methods for VerilatedTrace. These are + // called from only one place (namely full*) so always inline them. + inline void emitBit(vluint32_t code, vluint32_t newval); + inline void emitBus(vluint32_t code, vluint32_t newval, int bits); + inline void emitQuad(vluint32_t code, vluint64_t newval, int bits); + inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits); + inline void emitFloat(vluint32_t code, float newval); + inline void emitDouble(vluint32_t code, double newval); public: //========================================================================= diff --git a/include/verilated_trace.h b/include/verilated_trace.h index a611a436c..ee0bca3e2 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -262,7 +262,7 @@ public: // this is very hot code during tracing. // duck-typed void emitBit(vluint32_t code, vluint32_t newval) = 0; - // duck-typed template void emitBus(vluint32_t code, vluint32_t newval) = 0; + // duck-typed void emitBus(vluint32_t code, vluint32_t newval, int bits) = 0; // duck-typed void emitQuad(vluint32_t code, vluint64_t newval, int bits) = 0; // duck-typed void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) = 0; // duck-typed void emitFloat(vluint32_t code, float newval) = 0; @@ -272,7 +272,7 @@ public: // Write to previous value buffer value and emit trace entry. void fullBit(vluint32_t* oldp, vluint32_t newval); - template void fullBus(vluint32_t* oldp, vluint32_t newval); + void fullBus(vluint32_t* oldp, vluint32_t newval, int bits); void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits); void fullArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits); void fullFloat(vluint32_t* oldp, float newval); @@ -286,8 +286,8 @@ public: m_traceBufferWritep += 2; VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); } - template inline void chgBus(vluint32_t* oldp, vluint32_t newval) { - m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | T_Bits; + inline void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | bits; m_traceBufferWritep[1].oldp = oldp; m_traceBufferWritep[2].newBits = newval; m_traceBufferWritep += 3; @@ -339,9 +339,9 @@ public: const vluint32_t diff = *oldp ^ newval; if (VL_UNLIKELY(diff)) fullBit(oldp, newval); } - template inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval) { + inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval, int bits) { const vluint32_t diff = *oldp ^ newval; - if (VL_UNLIKELY(diff)) fullBus(oldp, newval); + if (VL_UNLIKELY(diff)) fullBus(oldp, newval, bits); } inline void CHG(Quad)(vluint32_t* oldp, vluint64_t newval, int bits) { const vluint64_t diff = *reinterpret_cast(oldp) ^ newval; diff --git a/include/verilated_trace_imp.cpp b/include/verilated_trace_imp.cpp index b5b6fc924..3dc3c33d4 100644 --- a/include/verilated_trace_imp.cpp +++ b/include/verilated_trace_imp.cpp @@ -161,50 +161,15 @@ template <> void VerilatedTrace::workerThreadMain() { continue; case VerilatedTraceCommand::CHG_BUS: VL_TRACE_THREAD_DEBUG("Command CHG_BUS"); - - oldp = (readp++)->oldp; - newBits = (readp++)->newBits; - // Bits stored in bottom byte of command - switch (cmd & 0xFFU) { - case 2: chgBusImpl<2>(oldp, newBits); continue; - case 3: chgBusImpl<3>(oldp, newBits); continue; - case 4: chgBusImpl<4>(oldp, newBits); continue; - case 5: chgBusImpl<5>(oldp, newBits); continue; - case 6: chgBusImpl<6>(oldp, newBits); continue; - case 7: chgBusImpl<7>(oldp, newBits); continue; - case 8: chgBusImpl<8>(oldp, newBits); continue; - case 9: chgBusImpl<9>(oldp, newBits); continue; - case 10: chgBusImpl<10>(oldp, newBits); continue; - case 11: chgBusImpl<11>(oldp, newBits); continue; - case 12: chgBusImpl<12>(oldp, newBits); continue; - case 13: chgBusImpl<13>(oldp, newBits); continue; - case 14: chgBusImpl<14>(oldp, newBits); continue; - case 15: chgBusImpl<15>(oldp, newBits); continue; - case 16: chgBusImpl<16>(oldp, newBits); continue; - case 17: chgBusImpl<17>(oldp, newBits); continue; - case 18: chgBusImpl<18>(oldp, newBits); continue; - case 19: chgBusImpl<19>(oldp, newBits); continue; - case 20: chgBusImpl<20>(oldp, newBits); continue; - case 21: chgBusImpl<21>(oldp, newBits); continue; - case 22: chgBusImpl<22>(oldp, newBits); continue; - case 23: chgBusImpl<23>(oldp, newBits); continue; - case 24: chgBusImpl<24>(oldp, newBits); continue; - case 25: chgBusImpl<25>(oldp, newBits); continue; - case 26: chgBusImpl<26>(oldp, newBits); continue; - case 27: chgBusImpl<27>(oldp, newBits); continue; - case 28: chgBusImpl<28>(oldp, newBits); continue; - case 29: chgBusImpl<29>(oldp, newBits); continue; - case 30: chgBusImpl<30>(oldp, newBits); continue; - case 31: chgBusImpl<31>(oldp, newBits); continue; - case 32: chgBusImpl<32>(oldp, newBits); continue; - } - VL_FATAL_MT(__FILE__, __LINE__, "", "Bad number of bits in CHG_BUS command"); - break; + chgBusImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL); + readp += 2; + VL_TRACE_THREAD_DEBUG("Command CHG_BUS DONE"); + continue; case VerilatedTraceCommand::CHG_QUAD: VL_TRACE_THREAD_DEBUG("Command CHG_QUAD"); // Bits stored in bottom byte of command - chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFF); + chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL); readp += 2; continue; case VerilatedTraceCommand::CHG_ARRAY: @@ -516,49 +481,12 @@ template <> void VerilatedTrace::fullBit(vluint32_t* oldp, vluint3 self()->emitBit(oldp - m_sigs_oldvalp, newval); } -// We want these functions specialized for sizes to avoid hard to predict -// branches, but we don't want them inlined, so we explicitly instantiate the -// template for each size used by Verilator. template <> -template -void VerilatedTrace::fullBus(vluint32_t* oldp, vluint32_t newval) { +void VerilatedTrace::fullBus(vluint32_t* oldp, vluint32_t newval, int bits) { *oldp = newval; - self()->emitBus(oldp - m_sigs_oldvalp, newval); + self()->emitBus(oldp - m_sigs_oldvalp, newval, bits); } -// Note: No specialization for width 1, covered by 'fullBit' -template void VerilatedTrace::fullBus<2>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<3>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<4>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<5>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<6>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<7>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<8>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<9>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<10>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<11>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<12>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<13>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<14>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<15>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<16>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<17>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<18>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<19>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<20>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<21>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<22>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<23>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<24>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<25>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<26>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<27>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<28>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<29>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<30>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<31>(vluint32_t* oldp, vluint32_t newval); -template void VerilatedTrace::fullBus<32>(vluint32_t* oldp, vluint32_t newval); - template <> void VerilatedTrace::fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) { *reinterpret_cast(oldp) = newval; diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index f52c80882..e1ffd2293 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -611,15 +611,20 @@ void VerilatedVcd::declTriArray(vluint32_t code, const char* name, bool array, i //============================================================================= // Emit trace entries +#define VL_VCD_SUFFIXP(code) (m_suffixesp + (code)*VL_TRACE_SUFFIX_ENTRY_SIZE) + // Emit suffix, write back write pointer, check buffer void VerilatedVcd::finishLine(vluint32_t code, char* writep) { - const char* const suffixp = m_suffixesp + code * VL_TRACE_SUFFIX_ENTRY_SIZE; + const char* const suffixp = VL_VCD_SUFFIXP(code); // Copy the whole suffix (this avoid having hard to predict branches which - // helps a lot). Note suffixp could be aligned, so could load it in one go, - // but then we would be endiannes dependent which we don't have a way to - // test right now and probably would make little difference... - // Note: The maximum length of the suffix is + // helps a lot). Note: The maximum length of the suffix is // VL_TRACE_MAX_VCD_CODE_SIZE + 2 == 7, but we unroll this here for speed. +#ifdef __x86_64__ + // Copy the whole 8 bytes in one go, this works on little-endian machines + // supporting unaligned stores. + *reinterpret_cast(writep) = *reinterpret_cast(suffixp); +#else + // Portable variant writep[0] = suffixp[0]; writep[1] = suffixp[1]; writep[2] = suffixp[2]; @@ -627,139 +632,202 @@ void VerilatedVcd::finishLine(vluint32_t code, char* writep) { writep[4] = suffixp[4]; writep[5] = suffixp[5]; writep[6] = '\n'; // The 6th index is always '\n' if it's relevant, no need to fetch it. +#endif // Now write back the write pointer incremented by the actual size of the // suffix, which was stored in the last byte of the suffix buffer entry. m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1]; bufferCheck(); } +// Note: emit* are only ever called from one place (full* in +// verilated_trace_imp.cpp, which is included in this file at the top), +// so always inline them. + +VL_ATTR_ALWINLINE void VerilatedVcd::emitBit(vluint32_t code, vluint32_t newval) { + // Don't prefetch suffix as it's a bit too late; char* wp = m_writep; *wp++ = '0' | static_cast(newval); finishLine(code, wp); } -template void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval) { +VL_ATTR_ALWINLINE +void VerilatedVcd::emitBus(vluint32_t code, vluint32_t newval, int bits) { + VL_PREFETCH_RD(VL_VCD_SUFFIXP(code)); char* wp = m_writep; *wp++ = 'b'; - newval <<= 32 - T_Bits; - int bits = T_Bits; - do { - *wp++ = '0' | static_cast(newval >> 31); - newval <<= 1; - } while (--bits); + wp += bits; + // clang-format off + switch (bits) { + case 32: wp[-32] = '0' | static_cast((newval >> 31) ); //FALLTHRU + case 31: wp[-31] = '0' | static_cast((newval >> 30) & 1); //FALLTHRU + case 30: wp[-30] = '0' | static_cast((newval >> 29) & 1); //FALLTHRU + case 29: wp[-29] = '0' | static_cast((newval >> 28) & 1); //FALLTHRU + case 28: wp[-28] = '0' | static_cast((newval >> 27) & 1); //FALLTHRU + case 27: wp[-27] = '0' | static_cast((newval >> 26) & 1); //FALLTHRU + case 26: wp[-26] = '0' | static_cast((newval >> 25) & 1); //FALLTHRU + case 25: wp[-25] = '0' | static_cast((newval >> 24) & 1); //FALLTHRU + case 24: wp[-24] = '0' | static_cast((newval >> 23) & 1); //FALLTHRU + case 23: wp[-23] = '0' | static_cast((newval >> 22) & 1); //FALLTHRU + case 22: wp[-22] = '0' | static_cast((newval >> 21) & 1); //FALLTHRU + case 21: wp[-21] = '0' | static_cast((newval >> 20) & 1); //FALLTHRU + case 20: wp[-20] = '0' | static_cast((newval >> 19) & 1); //FALLTHRU + case 19: wp[-19] = '0' | static_cast((newval >> 18) & 1); //FALLTHRU + case 18: wp[-18] = '0' | static_cast((newval >> 17) & 1); //FALLTHRU + case 17: wp[-17] = '0' | static_cast((newval >> 16) & 1); //FALLTHRU + case 16: wp[-16] = '0' | static_cast((newval >> 15) & 1); //FALLTHRU + case 15: wp[-15] = '0' | static_cast((newval >> 14) & 1); //FALLTHRU + case 14: wp[-14] = '0' | static_cast((newval >> 13) & 1); //FALLTHRU + case 13: wp[-13] = '0' | static_cast((newval >> 12) & 1); //FALLTHRU + case 12: wp[-12] = '0' | static_cast((newval >> 11) & 1); //FALLTHRU + case 11: wp[-11] = '0' | static_cast((newval >> 10) & 1); //FALLTHRU + case 10: wp[-10] = '0' | static_cast((newval >> 9) & 1); //FALLTHRU + case 9: wp[ -9] = '0' | static_cast((newval >> 8) & 1); //FALLTHRU + case 8: wp[ -8] = '0' | static_cast((newval >> 7) & 1); //FALLTHRU + case 7: wp[ -7] = '0' | static_cast((newval >> 6) & 1); //FALLTHRU + case 6: wp[ -6] = '0' | static_cast((newval >> 5) & 1); //FALLTHRU + case 5: wp[ -5] = '0' | static_cast((newval >> 4) & 1); //FALLTHRU + case 4: wp[ -4] = '0' | static_cast((newval >> 3) & 1); //FALLTHRU + case 3: wp[ -3] = '0' | static_cast((newval >> 2) & 1); //FALLTHRU + case 2: wp[ -2] = '0' | static_cast((newval >> 1) & 1); //FALLTHRU + /*bit*/ wp[ -1] = '0' | static_cast((newval ) & 1); //FALLTHRU + } + // clang-format on finishLine(code, wp); } +VL_ATTR_ALWINLINE void VerilatedVcd::emitQuad(vluint32_t code, vluint64_t newval, int bits) { + VL_PREFETCH_RD(VL_VCD_SUFFIXP(code)); char* wp = m_writep; *wp++ = 'b'; - newval <<= 64 - bits; // Handle the top 32 bits within the 64 bit input const int bitsInTopHalf = bits - 32; wp += bitsInTopHalf; // clang-format off switch (bitsInTopHalf) { - case 32: wp[-32] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 31: wp[-31] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 30: wp[-30] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 29: wp[-29] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 28: wp[-28] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 27: wp[-27] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 26: wp[-26] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 25: wp[-25] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 24: wp[-24] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 23: wp[-23] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 22: wp[-22] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 21: wp[-21] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 20: wp[-20] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 19: wp[-19] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 18: wp[-18] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 17: wp[-17] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 16: wp[-16] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 15: wp[-15] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 14: wp[-14] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 13: wp[-13] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 12: wp[-12] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 11: wp[-11] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 10: wp[-10] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 9: wp[ -9] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 8: wp[ -8] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 7: wp[ -7] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 6: wp[ -6] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 5: wp[ -5] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 4: wp[ -4] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 3: wp[ -3] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 2: wp[ -2] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU - case 1: wp[ -1] = '0' | static_cast(newval >> 63); newval<<=1; //FALLTHRU + case 32: wp[-32] = '0' | static_cast((newval >> 63) ); //FALLTHRU + case 31: wp[-31] = '0' | static_cast((newval >> 62) & 1); //FALLTHRU + case 30: wp[-30] = '0' | static_cast((newval >> 61) & 1); //FALLTHRU + case 29: wp[-29] = '0' | static_cast((newval >> 60) & 1); //FALLTHRU + case 28: wp[-28] = '0' | static_cast((newval >> 59) & 1); //FALLTHRU + case 27: wp[-27] = '0' | static_cast((newval >> 58) & 1); //FALLTHRU + case 26: wp[-26] = '0' | static_cast((newval >> 57) & 1); //FALLTHRU + case 25: wp[-25] = '0' | static_cast((newval >> 56) & 1); //FALLTHRU + case 24: wp[-24] = '0' | static_cast((newval >> 55) & 1); //FALLTHRU + case 23: wp[-23] = '0' | static_cast((newval >> 54) & 1); //FALLTHRU + case 22: wp[-22] = '0' | static_cast((newval >> 53) & 1); //FALLTHRU + case 21: wp[-21] = '0' | static_cast((newval >> 52) & 1); //FALLTHRU + case 20: wp[-20] = '0' | static_cast((newval >> 51) & 1); //FALLTHRU + case 19: wp[-19] = '0' | static_cast((newval >> 50) & 1); //FALLTHRU + case 18: wp[-18] = '0' | static_cast((newval >> 49) & 1); //FALLTHRU + case 17: wp[-17] = '0' | static_cast((newval >> 48) & 1); //FALLTHRU + case 16: wp[-16] = '0' | static_cast((newval >> 47) & 1); //FALLTHRU + case 15: wp[-15] = '0' | static_cast((newval >> 46) & 1); //FALLTHRU + case 14: wp[-14] = '0' | static_cast((newval >> 45) & 1); //FALLTHRU + case 13: wp[-13] = '0' | static_cast((newval >> 44) & 1); //FALLTHRU + case 12: wp[-12] = '0' | static_cast((newval >> 43) & 1); //FALLTHRU + case 11: wp[-11] = '0' | static_cast((newval >> 42) & 1); //FALLTHRU + case 10: wp[-10] = '0' | static_cast((newval >> 41) & 1); //FALLTHRU + case 9: wp[ -9] = '0' | static_cast((newval >> 40) & 1); //FALLTHRU + case 8: wp[ -8] = '0' | static_cast((newval >> 39) & 1); //FALLTHRU + case 7: wp[ -7] = '0' | static_cast((newval >> 38) & 1); //FALLTHRU + case 6: wp[ -6] = '0' | static_cast((newval >> 37) & 1); //FALLTHRU + case 5: wp[ -5] = '0' | static_cast((newval >> 36) & 1); //FALLTHRU + case 4: wp[ -4] = '0' | static_cast((newval >> 35) & 1); //FALLTHRU + case 3: wp[ -3] = '0' | static_cast((newval >> 34) & 1); //FALLTHRU + case 2: wp[ -2] = '0' | static_cast((newval >> 33) & 1); //FALLTHRU + case 1: wp[ -1] = '0' | static_cast((newval >> 32) & 1); //FALLTHRU } // clang-format on // Handle the bottom 32 bits within the 64 bit input - int remaining = 32; + vluint32_t val = static_cast(newval); // Truncate to bottom 32 bits + int loops = 4; do { - *wp++ = '0' | static_cast(newval >> 63); - newval <<= 1; - } while (--remaining); + wp[0] = '0' | static_cast((val >> 31)); + wp[1] = '0' | static_cast((val >> 30) & 1); + wp[2] = '0' | static_cast((val >> 29) & 1); + wp[3] = '0' | static_cast((val >> 28) & 1); + wp[4] = '0' | static_cast((val >> 27) & 1); + wp[5] = '0' | static_cast((val >> 26) & 1); + wp[6] = '0' | static_cast((val >> 25) & 1); + wp[7] = '0' | static_cast((val >> 24) & 1); + wp += 8; + val <<= 8; + } while (--loops); + finishLine(code, wp); } +VL_ATTR_ALWINLINE void VerilatedVcd::emitArray(vluint32_t code, const vluint32_t* newvalp, int bits) { + VL_PREFETCH_RD(VL_VCD_SUFFIXP(code)); int words = (bits + 31) / 32; char* wp = m_writep; *wp++ = 'b'; // Handle the most significant word + vluint32_t val = newvalp[--words]; const int bitsInMSW = bits % 32 == 0 ? 32 : bits % 32; - vluint32_t val = newvalp[--words] << (32 - bitsInMSW); wp += bitsInMSW; // clang-format off switch (bitsInMSW) { - case 32: wp[-32] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 31: wp[-31] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 30: wp[-30] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 29: wp[-29] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 28: wp[-28] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 27: wp[-27] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 26: wp[-26] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 25: wp[-25] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 24: wp[-24] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 23: wp[-23] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 22: wp[-22] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 21: wp[-21] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 20: wp[-20] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 19: wp[-19] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 18: wp[-18] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 17: wp[-17] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 16: wp[-16] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 15: wp[-15] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 14: wp[-14] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 13: wp[-13] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 12: wp[-12] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 11: wp[-11] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 10: wp[-10] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 9: wp[ -9] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 8: wp[ -8] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 7: wp[ -7] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 6: wp[ -6] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 5: wp[ -5] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 4: wp[ -4] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 3: wp[ -3] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 2: wp[ -2] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU - case 1: wp[ -1] = '0' | static_cast(val >> 31); val<<=1; //FALLTHRU + case 32: wp[-32] = '0' | static_cast((val >> 31) ); //FALLTHRU + case 31: wp[-31] = '0' | static_cast((val >> 30) & 1); //FALLTHRU + case 30: wp[-30] = '0' | static_cast((val >> 29) & 1); //FALLTHRU + case 29: wp[-29] = '0' | static_cast((val >> 28) & 1); //FALLTHRU + case 28: wp[-28] = '0' | static_cast((val >> 27) & 1); //FALLTHRU + case 27: wp[-27] = '0' | static_cast((val >> 26) & 1); //FALLTHRU + case 26: wp[-26] = '0' | static_cast((val >> 25) & 1); //FALLTHRU + case 25: wp[-25] = '0' | static_cast((val >> 24) & 1); //FALLTHRU + case 24: wp[-24] = '0' | static_cast((val >> 23) & 1); //FALLTHRU + case 23: wp[-23] = '0' | static_cast((val >> 22) & 1); //FALLTHRU + case 22: wp[-22] = '0' | static_cast((val >> 21) & 1); //FALLTHRU + case 21: wp[-21] = '0' | static_cast((val >> 20) & 1); //FALLTHRU + case 20: wp[-20] = '0' | static_cast((val >> 19) & 1); //FALLTHRU + case 19: wp[-19] = '0' | static_cast((val >> 18) & 1); //FALLTHRU + case 18: wp[-18] = '0' | static_cast((val >> 17) & 1); //FALLTHRU + case 17: wp[-17] = '0' | static_cast((val >> 16) & 1); //FALLTHRU + case 16: wp[-16] = '0' | static_cast((val >> 15) & 1); //FALLTHRU + case 15: wp[-15] = '0' | static_cast((val >> 14) & 1); //FALLTHRU + case 14: wp[-14] = '0' | static_cast((val >> 13) & 1); //FALLTHRU + case 13: wp[-13] = '0' | static_cast((val >> 12) & 1); //FALLTHRU + case 12: wp[-12] = '0' | static_cast((val >> 11) & 1); //FALLTHRU + case 11: wp[-11] = '0' | static_cast((val >> 10) & 1); //FALLTHRU + case 10: wp[-10] = '0' | static_cast((val >> 9) & 1); //FALLTHRU + case 9: wp[ -9] = '0' | static_cast((val >> 8) & 1); //FALLTHRU + case 8: wp[ -8] = '0' | static_cast((val >> 7) & 1); //FALLTHRU + case 7: wp[ -7] = '0' | static_cast((val >> 6) & 1); //FALLTHRU + case 6: wp[ -6] = '0' | static_cast((val >> 5) & 1); //FALLTHRU + case 5: wp[ -5] = '0' | static_cast((val >> 4) & 1); //FALLTHRU + case 4: wp[ -4] = '0' | static_cast((val >> 3) & 1); //FALLTHRU + case 3: wp[ -3] = '0' | static_cast((val >> 2) & 1); //FALLTHRU + case 2: wp[ -2] = '0' | static_cast((val >> 1) & 1); //FALLTHRU + case 1: wp[ -1] = '0' | static_cast((val ) & 1); //FALLTHRU } // clang-format on // Handle the remaining words while (words > 0) { vluint32_t val = newvalp[--words]; - int bits = 32; + int loops = 4; do { - *wp++ = '0' | static_cast(val >> 31); - val <<= 1; - } while (--bits); + wp[0] = '0' | static_cast((val >> 31)); + wp[1] = '0' | static_cast((val >> 30) & 1); + wp[2] = '0' | static_cast((val >> 29) & 1); + wp[3] = '0' | static_cast((val >> 28) & 1); + wp[4] = '0' | static_cast((val >> 27) & 1); + wp[5] = '0' | static_cast((val >> 26) & 1); + wp[6] = '0' | static_cast((val >> 25) & 1); + wp[7] = '0' | static_cast((val >> 24) & 1); + wp += 8; + val <<= 8; + } while (--loops); } finishLine(code, wp); } +VL_ATTR_ALWINLINE void VerilatedVcd::emitFloat(vluint32_t code, float newval) { + VL_PREFETCH_RD(VL_VCD_SUFFIXP(code)); char* wp = m_writep; // Buffer can't overflow before sprintf; we sized during declaration sprintf(wp, "r%.16g", static_cast(newval)); @@ -767,7 +835,9 @@ void VerilatedVcd::emitFloat(vluint32_t code, float newval) { finishLine(code, wp); } +VL_ATTR_ALWINLINE void VerilatedVcd::emitDouble(vluint32_t code, double newval) { + VL_PREFETCH_RD(VL_VCD_SUFFIXP(code)); char* wp = m_writep; // Buffer can't overflow before sprintf; we sized during declaration sprintf(wp, "r%.16g", newval); @@ -775,6 +845,8 @@ void VerilatedVcd::emitDouble(vluint32_t code, double newval) { finishLine(code, wp); } +#undef VL_VCD_SUFFIXP + #ifdef VL_TRACE_VCD_OLD_API void VerilatedVcd::fullBit(vluint32_t code, const vluint32_t newval) { diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index e9838c15b..d9049366f 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -122,13 +122,14 @@ protected: bool preFullDump() VL_OVERRIDE { return isOpen(); } bool preChangeDump() VL_OVERRIDE; - // Implementations of duck-typed methods for VerilatedTrace - void emitBit(vluint32_t code, vluint32_t newval); - template void emitBus(vluint32_t code, vluint32_t newval); - void emitQuad(vluint32_t code, vluint64_t newval, int bits); - void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits); - void emitFloat(vluint32_t code, float newval); - void emitDouble(vluint32_t code, double newval); + // Implementations of duck-typed methods for VerilatedTrace. These are + // called from only one place (namely full*) so always inline them. + inline void emitBit(vluint32_t code, vluint32_t newval); + inline void emitBus(vluint32_t code, vluint32_t newval, int bits); + inline void emitQuad(vluint32_t code, vluint64_t newval, int bits); + inline void emitArray(vluint32_t code, const vluint32_t* newvalp, int bits); + inline void emitFloat(vluint32_t code, float newval); + inline void emitDouble(vluint32_t code, double newval); public: //========================================================================= @@ -179,8 +180,8 @@ public: // Write back to previous value buffer value and emit void fullBit(vluint32_t* oldp, vluint32_t newval) { fullBit(oldp - this->oldp(0), newval); } - template void fullBus(vluint32_t* oldp, vluint32_t newval) { - fullBus(oldp - this->oldp(0), newval, T_Bits); + void fullBus(vluint32_t* oldp, vluint32_t newval, int bits) { + fullBus(oldp - this->oldp(0), newval, bits); } void fullQuad(vluint32_t* oldp, vluint64_t newval, int bits) { fullQuad(oldp - this->oldp(0), newval, bits); @@ -195,8 +196,8 @@ public: // Check previous value and emit if changed void chgBit(vluint32_t* oldp, vluint32_t newval) { chgBit(oldp - this->oldp(0), newval); } - template void chgBus(vluint32_t* oldp, vluint32_t newval) { - chgBus(oldp - this->oldp(0), newval, T_Bits); + void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) { + chgBus(oldp - this->oldp(0), newval, bits); } void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) { chgQuad(oldp - this->oldp(0), newval, bits); diff --git a/src/V3EmitC.cpp b/src/V3EmitC.cpp index 28c3ace73..ee4ae3e3d 100644 --- a/src/V3EmitC.cpp +++ b/src/V3EmitC.cpp @@ -3561,7 +3561,8 @@ class EmitCTrace : EmitCStmts { puts("vcdp->" + full + "Quad"); emitWidth = true; } else if (nodep->declp()->widthMin() > 1) { - puts("vcdp->" + full + "Bus<" + cvtToStr(nodep->declp()->widthMin()) + ">"); + puts("vcdp->" + full + "Bus"); + emitWidth = true; } else { puts("vcdp->" + full + "Bit"); }