forked from github/verilator
VCD tracing speed improvements (#2246)
* Don't inline VCD dump functions Improves model speed with tracing. Measured on SweRW cmark: - GCC 5.5 ~3% faster - Clang 6.0 ~12% faster (!) * Remove redundant test from VCD bit tracing. Improves model speed with tracing. Measured on SweRW cmark: - GCC 5.5 ~7.5% faster - Clang 6.0 ~1.5% faster
This commit is contained in:
parent
0f617988d4
commit
05f213c266
2
Changes
2
Changes
@ -19,6 +19,8 @@ The contributors that suggested a given feature are shown in []. Thanks!
|
|||||||
|
|
||||||
**** Fix build of fast path tracing code to use OPT_FAST, #2245. [Geza Lore]
|
**** Fix build of fast path tracing code to use OPT_FAST, #2245. [Geza Lore]
|
||||||
|
|
||||||
|
**** Improve VCD dump performance, ##2246. [Geza Lore]
|
||||||
|
|
||||||
|
|
||||||
* Verilator 4.032 2020-04-04
|
* Verilator 4.032 2020-04-04
|
||||||
|
|
||||||
|
@ -179,7 +179,8 @@ public:
|
|||||||
void fullTriBit(vluint32_t code, const vluint32_t newval, const vluint32_t newtri);
|
void fullTriBit(vluint32_t code, const vluint32_t newval, const vluint32_t newtri);
|
||||||
void fullTriBus(vluint32_t code, const vluint32_t newval, const vluint32_t newtri, int bits);
|
void fullTriBus(vluint32_t code, const vluint32_t newval, const vluint32_t newtri, int bits);
|
||||||
void fullTriQuad(vluint32_t code, const vluint64_t newval, const vluint32_t newtri, int bits);
|
void fullTriQuad(vluint32_t code, const vluint64_t newval, const vluint32_t newtri, int bits);
|
||||||
void fullTriArray(vluint32_t code, const vluint32_t* newvalp, const vluint32_t* newtrip, int bits);
|
void fullTriArray(vluint32_t code, const vluint32_t* newvalp, const vluint32_t* newtrip,
|
||||||
|
int bits);
|
||||||
void fullBitX(vluint32_t code);
|
void fullBitX(vluint32_t code);
|
||||||
void fullBusX(vluint32_t code, int bits);
|
void fullBusX(vluint32_t code, int bits);
|
||||||
void fullQuadX(vluint32_t code, int bits);
|
void fullQuadX(vluint32_t code, int bits);
|
||||||
|
@ -657,6 +657,112 @@ void VerilatedVcd::declDouble(vluint32_t code, const char* name, bool array, int
|
|||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
|
|
||||||
|
void VerilatedVcd::fullBit(vluint32_t code, const vluint32_t newval) {
|
||||||
|
// Note the &1, so we don't require clean input -- makes more common no change case faster
|
||||||
|
m_sigs_oldvalp[code] = newval;
|
||||||
|
*m_writep++ = ('0' + static_cast<char>(newval & 1));
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullBus(vluint32_t code, const vluint32_t newval, int bits) {
|
||||||
|
m_sigs_oldvalp[code] = newval;
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = ((newval & (1L << bit)) ? '1' : '0');
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullQuad(vluint32_t code, const vluint64_t newval, int bits) {
|
||||||
|
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code]))) = newval;
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = ((newval & (VL_ULL(1) << bit)) ? '1' : '0');
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullArray(vluint32_t code, const vluint32_t* newval, int bits) {
|
||||||
|
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
|
||||||
|
m_sigs_oldvalp[code + word] = newval[word];
|
||||||
|
}
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = ((newval[(bit / 32)] & (1L << (bit & 0x1f))) ? '1' : '0');
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullArray(vluint32_t code, const vluint64_t* newval, int bits) {
|
||||||
|
for (int word = 0; word < (((bits - 1) / 64) + 1); ++word) {
|
||||||
|
m_sigs_oldvalp[code + word] = newval[word];
|
||||||
|
}
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = ((newval[(bit / 64)] & (VL_ULL(1) << (bit & 0x3f))) ? '1' : '0');
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullTriBit(vluint32_t code, const vluint32_t newval, const vluint32_t newtri) {
|
||||||
|
m_sigs_oldvalp[code] = newval;
|
||||||
|
m_sigs_oldvalp[code + 1] = newtri;
|
||||||
|
*m_writep++ = "01zz"[m_sigs_oldvalp[code] | (m_sigs_oldvalp[code + 1] << 1)];
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullTriBus(vluint32_t code, const vluint32_t newval, const vluint32_t newtri, int bits) {
|
||||||
|
m_sigs_oldvalp[code] = newval;
|
||||||
|
m_sigs_oldvalp[code + 1] = newtri;
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = "01zz"[((newval >> bit) & 1) | (((newtri >> bit) & 1) << 1)];
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullTriQuad(vluint32_t code, const vluint64_t newval, const vluint32_t newtri, int bits) {
|
||||||
|
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code]))) = newval;
|
||||||
|
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code + 1]))) = newtri;
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = "01zz"[((newval >> bit) & VL_ULL(1))
|
||||||
|
| (((newtri >> bit) & VL_ULL(1)) << VL_ULL(1))];
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullTriArray(vluint32_t code, const vluint32_t* newvalp, const vluint32_t* newtrip,
|
||||||
|
int bits) {
|
||||||
|
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
|
||||||
|
m_sigs_oldvalp[code + word * 2] = newvalp[word];
|
||||||
|
m_sigs_oldvalp[code + word * 2 + 1] = newtrip[word];
|
||||||
|
}
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
vluint32_t valbit = (newvalp[(bit / 32)] >> (bit & 0x1f)) & 1;
|
||||||
|
vluint32_t tribit = (newtrip[(bit / 32)] >> (bit & 0x1f)) & 1;
|
||||||
|
*m_writep++ = "01zz"[valbit | (tribit << 1)];
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
void VerilatedVcd::fullDouble(vluint32_t code, const double newval) {
|
void VerilatedVcd::fullDouble(vluint32_t code, const double newval) {
|
||||||
// cppcheck-suppress invalidPointerCast
|
// cppcheck-suppress invalidPointerCast
|
||||||
(*(reinterpret_cast<double*>(&m_sigs_oldvalp[code]))) = newval;
|
(*(reinterpret_cast<double*>(&m_sigs_oldvalp[code]))) = newval;
|
||||||
@ -679,6 +785,24 @@ void VerilatedVcd::fullFloat(vluint32_t code, const float newval) {
|
|||||||
*m_writep++ = '\n';
|
*m_writep++ = '\n';
|
||||||
bufferCheck();
|
bufferCheck();
|
||||||
}
|
}
|
||||||
|
void VerilatedVcd::fullBitX(vluint32_t code) {
|
||||||
|
*m_writep++ = 'x';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullBusX(vluint32_t code, int bits) {
|
||||||
|
*m_writep++ = 'b';
|
||||||
|
for (int bit = bits - 1; bit >= 0; --bit) {
|
||||||
|
*m_writep++ = 'x';
|
||||||
|
}
|
||||||
|
*m_writep++ = ' ';
|
||||||
|
printCode(code);
|
||||||
|
*m_writep++ = '\n';
|
||||||
|
bufferCheck();
|
||||||
|
}
|
||||||
|
void VerilatedVcd::fullQuadX(vluint32_t code, int bits) { fullBusX(code, bits); }
|
||||||
|
void VerilatedVcd::fullArrayX(vluint32_t code, int bits) { fullBusX(code, bits); }
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Callbacks
|
// Callbacks
|
||||||
|
@ -216,113 +216,17 @@ public:
|
|||||||
void declFloat(vluint32_t code, const char* name, bool array, int arraynum);
|
void declFloat(vluint32_t code, const char* name, bool array, int arraynum);
|
||||||
// ... other module_start for submodules (based on cell name)
|
// ... other module_start for submodules (based on cell name)
|
||||||
|
|
||||||
/// Inside dumping routines, dump one signal
|
/// Inside dumping routines, dump one signal, faster when not inlined
|
||||||
void fullBit(vluint32_t code, const vluint32_t newval) {
|
/// due to code size reduction.
|
||||||
// Note the &1, so we don't require clean input -- makes more common no change case faster
|
void fullBit(vluint32_t code, const vluint32_t newval);
|
||||||
m_sigs_oldvalp[code] = newval;
|
void fullBus(vluint32_t code, const vluint32_t newval, int bits);
|
||||||
*m_writep++ = ('0' + static_cast<char>(newval & 1));
|
void fullQuad(vluint32_t code, const vluint64_t newval, int bits);
|
||||||
printCode(code);
|
void fullArray(vluint32_t code, const vluint32_t* newval, int bits);
|
||||||
*m_writep++ = '\n';
|
void fullArray(vluint32_t code, const vluint64_t* newval, int bits);
|
||||||
bufferCheck();
|
void fullTriBit(vluint32_t code, const vluint32_t newval, const vluint32_t newtri);
|
||||||
}
|
void fullTriBus(vluint32_t code, const vluint32_t newval, const vluint32_t newtri, int bits);
|
||||||
void fullBus(vluint32_t code, const vluint32_t newval, int bits) {
|
void fullTriQuad(vluint32_t code, const vluint64_t newval, const vluint32_t newtri, int bits);
|
||||||
m_sigs_oldvalp[code] = newval;
|
void fullTriArray(vluint32_t code, const vluint32_t* newvalp, const vluint32_t* newtrip, int bits);
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = ((newval & (1L << bit)) ? '1' : '0');
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullQuad(vluint32_t code, const vluint64_t newval, int bits) {
|
|
||||||
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code]))) = newval;
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = ((newval & (VL_ULL(1) << bit)) ? '1' : '0');
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullArray(vluint32_t code, const vluint32_t* newval, int bits) {
|
|
||||||
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
|
|
||||||
m_sigs_oldvalp[code + word] = newval[word];
|
|
||||||
}
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = ((newval[(bit / 32)] & (1L << (bit & 0x1f))) ? '1' : '0');
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullArray(vluint32_t code, const vluint64_t* newval, int bits) {
|
|
||||||
for (int word = 0; word < (((bits - 1) / 64) + 1); ++word) {
|
|
||||||
m_sigs_oldvalp[code + word] = newval[word];
|
|
||||||
}
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = ((newval[(bit / 64)] & (VL_ULL(1) << (bit & 0x3f))) ? '1' : '0');
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullTriBit(vluint32_t code, const vluint32_t newval, const vluint32_t newtri) {
|
|
||||||
m_sigs_oldvalp[code] = newval;
|
|
||||||
m_sigs_oldvalp[code + 1] = newtri;
|
|
||||||
*m_writep++ = "01zz"[m_sigs_oldvalp[code] | (m_sigs_oldvalp[code + 1] << 1)];
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullTriBus(vluint32_t code, const vluint32_t newval, const vluint32_t newtri, int bits) {
|
|
||||||
m_sigs_oldvalp[code] = newval;
|
|
||||||
m_sigs_oldvalp[code + 1] = newtri;
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = "01zz"[((newval >> bit) & 1) | (((newtri >> bit) & 1) << 1)];
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullTriQuad(vluint32_t code, const vluint64_t newval, const vluint32_t newtri, int bits) {
|
|
||||||
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code]))) = newval;
|
|
||||||
(*(reinterpret_cast<vluint64_t*>(&m_sigs_oldvalp[code + 1]))) = newtri;
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = "01zz"[((newval >> bit) & VL_ULL(1))
|
|
||||||
| (((newtri >> bit) & VL_ULL(1)) << VL_ULL(1))];
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullTriArray(vluint32_t code, const vluint32_t* newvalp, const vluint32_t* newtrip,
|
|
||||||
int bits) {
|
|
||||||
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
|
|
||||||
m_sigs_oldvalp[code + word * 2] = newvalp[word];
|
|
||||||
m_sigs_oldvalp[code + word * 2 + 1] = newtrip[word];
|
|
||||||
}
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
vluint32_t valbit = (newvalp[(bit / 32)] >> (bit & 0x1f)) & 1;
|
|
||||||
vluint32_t tribit = (newtrip[(bit / 32)] >> (bit & 0x1f)) & 1;
|
|
||||||
*m_writep++ = "01zz"[valbit | (tribit << 1)];
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
void fullDouble(vluint32_t code, const double newval);
|
void fullDouble(vluint32_t code, const double newval);
|
||||||
void fullFloat(vluint32_t code, const float newval);
|
void fullFloat(vluint32_t code, const float newval);
|
||||||
|
|
||||||
@ -330,34 +234,17 @@ public:
|
|||||||
/// Presently this code doesn't change the oldval vector.
|
/// Presently this code doesn't change the oldval vector.
|
||||||
/// Thus this is for special standalone applications that after calling
|
/// Thus this is for special standalone applications that after calling
|
||||||
/// fullBitX, must when then value goes non-X call fullBit.
|
/// fullBitX, must when then value goes non-X call fullBit.
|
||||||
inline void fullBitX(vluint32_t code) {
|
void fullBitX(vluint32_t code);
|
||||||
*m_writep++ = 'x';
|
void fullBusX(vluint32_t code, int bits);
|
||||||
printCode(code);
|
void fullQuadX(vluint32_t code, int bits);
|
||||||
*m_writep++ = '\n';
|
void fullArrayX(vluint32_t code, int bits);
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
inline void fullBusX(vluint32_t code, int bits) {
|
|
||||||
*m_writep++ = 'b';
|
|
||||||
for (int bit = bits - 1; bit >= 0; --bit) {
|
|
||||||
*m_writep++ = 'x';
|
|
||||||
}
|
|
||||||
*m_writep++ = ' ';
|
|
||||||
printCode(code);
|
|
||||||
*m_writep++ = '\n';
|
|
||||||
bufferCheck();
|
|
||||||
}
|
|
||||||
inline void fullQuadX(vluint32_t code, int bits) { fullBusX(code, bits); }
|
|
||||||
inline void fullArrayX(vluint32_t code, int bits) { fullBusX(code, bits); }
|
|
||||||
|
|
||||||
/// Inside dumping routines, dump one signal if it has changed
|
/// Inside dumping routines, dump one signal if it has changed.
|
||||||
|
/// We do want to inline these to avoid calls when the value did not change.
|
||||||
inline void chgBit(vluint32_t code, const vluint32_t newval) {
|
inline void chgBit(vluint32_t code, const vluint32_t newval) {
|
||||||
vluint32_t diff = m_sigs_oldvalp[code] ^ newval;
|
vluint32_t diff = m_sigs_oldvalp[code] ^ newval;
|
||||||
if (VL_UNLIKELY(diff)) {
|
if (VL_UNLIKELY(diff)) {
|
||||||
// Verilator 3.510 and newer provide clean input, so the below
|
fullBit(code, newval);
|
||||||
// is only for back compatibility
|
|
||||||
if (VL_UNLIKELY(diff & 1)) { // Change after clean?
|
|
||||||
fullBit(code, newval);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void chgBus(vluint32_t code, const vluint32_t newval, int bits) {
|
inline void chgBus(vluint32_t code, const vluint32_t newval, int bits) {
|
||||||
|
Loading…
Reference in New Issue
Block a user