forked from github/verilator
Add profile-guided optmization of mtasks (#3150).
This commit is contained in:
parent
def7c8fe4d
commit
9029da5ab8
1
Changes
1
Changes
@ -11,6 +11,7 @@ contributors that suggested a given feature are shown in []. Thanks!
|
|||||||
Verilator 4.213 devel
|
Verilator 4.213 devel
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
* Add profile-guided optmization of mtasks (#3150).
|
||||||
* Verilator_gantt has removed the ASCII graphics, use the VCD output instead.
|
* Verilator_gantt has removed the ASCII graphics, use the VCD output instead.
|
||||||
* Verilator_gantt now shows the predicted mtask times, eval times, and additional statistics.
|
* Verilator_gantt now shows the predicted mtask times, eval times, and additional statistics.
|
||||||
* Verilator_gantt data files now include processor information, to allow later processing.
|
* Verilator_gantt data files now include processor information, to allow later processing.
|
||||||
|
@ -447,6 +447,7 @@ description of these arguments.
|
|||||||
+verilator+prof+threads+file+<filename> Set profile filename
|
+verilator+prof+threads+file+<filename> Set profile filename
|
||||||
+verilator+prof+threads+start+<value> Set profile starting point
|
+verilator+prof+threads+start+<value> Set profile starting point
|
||||||
+verilator+prof+threads+window+<value> Set profile duration
|
+verilator+prof+threads+window+<value> Set profile duration
|
||||||
|
+verilator+prof+vlt+file+<filename> Set profile guided filename
|
||||||
+verilator+rand+reset+<value> Set random reset technique
|
+verilator+rand+reset+<value> Set random reset technique
|
||||||
+verilator+seed+<value> Set random seed
|
+verilator+seed+<value> Set random seed
|
||||||
+verilator+V Verbose version and config
|
+verilator+V Verbose version and config
|
||||||
|
@ -62,6 +62,12 @@ Summary:
|
|||||||
makes sense for a single-clock-domain module where it's typical to want
|
makes sense for a single-clock-domain module where it's typical to want
|
||||||
to capture one posedge eval() and one negedge eval().
|
to capture one posedge eval() and one negedge eval().
|
||||||
|
|
||||||
|
.. option:: +verilator+prof+vlt+file+<filename>
|
||||||
|
|
||||||
|
When a model was Verilated using :vlopt:`--prof-threads`, sets the
|
||||||
|
profile-guided optimization data runtime filename to dump to. Defaults
|
||||||
|
to :file:`profile.vlt`.
|
||||||
|
|
||||||
.. option:: +verilator+rand+reset+<value>
|
.. option:: +verilator+rand+reset+<value>
|
||||||
|
|
||||||
When a model was Verilated using :vlopt:`--x-initial unique
|
When a model was Verilated using :vlopt:`--x-initial unique
|
||||||
|
@ -833,7 +833,7 @@ Summary:
|
|||||||
.. option:: --prof-threads
|
.. option:: --prof-threads
|
||||||
|
|
||||||
Enable gantt chart data collection for threaded builds. See :ref:`Thread
|
Enable gantt chart data collection for threaded builds. See :ref:`Thread
|
||||||
Profiling`.
|
Profiling` and :ref:`Thread PGO`.
|
||||||
|
|
||||||
.. option:: --protect-key <key>
|
.. option:: --protect-key <key>
|
||||||
|
|
||||||
@ -1612,6 +1612,12 @@ The grammar of configuration commands is as follows:
|
|||||||
:option:`/*verilator&32;public_flat*/`, etc, metacomments. See
|
:option:`/*verilator&32;public_flat*/`, etc, metacomments. See
|
||||||
e.g. :ref:`VPI Example`.
|
e.g. :ref:`VPI Example`.
|
||||||
|
|
||||||
|
.. option:: profile_data -mtask "<mtask_hash>" -cost <cost_value>
|
||||||
|
|
||||||
|
Feeds profile-guided optimization data into the Verilator algorithms in
|
||||||
|
order to improve model runtime performance. This option is not expected
|
||||||
|
to be used by users directly. See :ref:`Thread PGO`.
|
||||||
|
|
||||||
.. option:: sc_bv -module "<modulename>" [-task "<taskname>"] -var "<signame>"
|
.. option:: sc_bv -module "<modulename>" [-task "<taskname>"] -var "<signame>"
|
||||||
|
|
||||||
.. option:: sc_bv -module "<modulename>" [-function "<funcname>"] -var "<signame>"
|
.. option:: sc_bv -module "<modulename>" [-function "<funcname>"] -var "<signame>"
|
||||||
|
@ -145,3 +145,23 @@ After running Make, the C++ compiler may produce the following:
|
|||||||
- Intermediate dependencies
|
- Intermediate dependencies
|
||||||
* - *{prefix}{misc}*\ .o
|
* - *{prefix}{misc}*\ .o
|
||||||
- Intermediate objects
|
- Intermediate objects
|
||||||
|
|
||||||
|
The Verilated executable may produce the following:
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
|
||||||
|
* - coverage.dat
|
||||||
|
- Code coverage output, and default input filename for :command:`verilator_coverage`
|
||||||
|
* - gmon.out
|
||||||
|
- GCC/clang code profiler output, often fed into :command:`verilator_profcfunc`
|
||||||
|
* - profile.vlt
|
||||||
|
- -profile data file for :ref:`Thread PGO`
|
||||||
|
* - profile_threads.dat
|
||||||
|
- -profile-threads data file for :command:`verilator_gnatt`
|
||||||
|
|
||||||
|
Verilator_gantt may produce the following:
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
|
||||||
|
* - profile_threads.vcd
|
||||||
|
- Gantt report waveform output
|
||||||
|
@ -26,7 +26,8 @@ risk of reset bugs in trade for performance; see the above documentation
|
|||||||
for these options.
|
for these options.
|
||||||
|
|
||||||
If using Verilated multithreaded, use ``numactl`` to ensure you are using
|
If using Verilated multithreaded, use ``numactl`` to ensure you are using
|
||||||
non-conflicting hardware resources. See :ref:`Multithreading`.
|
non-conflicting hardware resources. See :ref:`Multithreading`. Also
|
||||||
|
consider using profile-guided optimization, see :ref:`Thread PGO`.
|
||||||
|
|
||||||
Minor Verilog code changes can also give big wins. You should not have any
|
Minor Verilog code changes can also give big wins. You should not have any
|
||||||
UNOPTFLAT warnings from Verilator. Fixing these warnings can result in
|
UNOPTFLAT warnings from Verilator. Fixing these warnings can result in
|
||||||
@ -93,9 +94,7 @@ cases, for example regressions, it is usually worth spending extra
|
|||||||
compilation time to reduce total CPU time.
|
compilation time to reduce total CPU time.
|
||||||
|
|
||||||
If you will be running many simulations on a single model, you can
|
If you will be running many simulations on a single model, you can
|
||||||
investigate profile guided optimization. With GCC, using GCC's
|
investigate profile guided optimization. See :ref:`Compiler PGO`.
|
||||||
"-fprofile-arcs", then GCC's "-fbranch-probabilities" will yield another
|
|
||||||
15% or so.
|
|
||||||
|
|
||||||
Modern compilers also support link-time optimization (LTO), which can help
|
Modern compilers also support link-time optimization (LTO), which can help
|
||||||
especially if you link in DPI code. To enable LTO on GCC, pass "-flto" in
|
especially if you link in DPI code. To enable LTO on GCC, pass "-flto" in
|
||||||
@ -298,6 +297,9 @@ With the :vlopt:`--prof-threads` option, Verilator will:
|
|||||||
* Add code to save profiling data in non-human-friendly form to the file
|
* Add code to save profiling data in non-human-friendly form to the file
|
||||||
specified with :vlopt:`+verilator+prof+threads+file+\<filename\>`.
|
specified with :vlopt:`+verilator+prof+threads+file+\<filename\>`.
|
||||||
|
|
||||||
|
* Add code to save profiling data for thread profile-guided
|
||||||
|
optimization. See :ref:`Thread PGO`.
|
||||||
|
|
||||||
The :command:`verilator_gantt` program may then be run to transform the
|
The :command:`verilator_gantt` program may then be run to transform the
|
||||||
saved profiling file into a nicer visual format and produce some related
|
saved profiling file into a nicer visual format and produce some related
|
||||||
statistics.
|
statistics.
|
||||||
@ -314,6 +316,7 @@ statistics.
|
|||||||
|
|
||||||
For more information see :command:`verilator_gantt`.
|
For more information see :command:`verilator_gantt`.
|
||||||
|
|
||||||
|
|
||||||
.. _Profiling ccache efficiency:
|
.. _Profiling ccache efficiency:
|
||||||
|
|
||||||
Profiling ccache efficiency
|
Profiling ccache efficiency
|
||||||
@ -377,3 +380,120 @@ For example:
|
|||||||
os >> main_time;
|
os >> main_time;
|
||||||
os >> *topp;
|
os >> *topp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Profile-Guided Optimization
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Profile-guided optimization is the technique where profiling data is
|
||||||
|
collected by running your simulation executable, then this information is
|
||||||
|
used to guide the next Verilation or compilation.
|
||||||
|
|
||||||
|
There are two forms of profile-guided optimizations. Unfortunately for
|
||||||
|
best results they must each be performed from the highest level code to the
|
||||||
|
lowest, which means performing them separately and in this order:
|
||||||
|
|
||||||
|
* :ref:`Thread PGO`
|
||||||
|
* :ref:`Compiler PGO`
|
||||||
|
|
||||||
|
Other forms of PGO may be supported in the future, such as clock and reset
|
||||||
|
toggle rate PGO, branch prediction PGO, statement execution time PGO, or
|
||||||
|
others as they prove beneficial.
|
||||||
|
|
||||||
|
|
||||||
|
.. _Thread PGO:
|
||||||
|
|
||||||
|
Thread Profile-Guided Optimization
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
Verilator supports thread profile-guided optimization (Thread PGO) to
|
||||||
|
improve multithreaded performance.
|
||||||
|
|
||||||
|
When using multithreading, Verilator computes how long macro tasks take and
|
||||||
|
tries to balance those across threads. (What is a macro-task? See the
|
||||||
|
Verilator internals document (:file:`docs/internals.rst` in the
|
||||||
|
distribution.) If the estimations are incorrect, the threads will not be
|
||||||
|
balanced, leading to decreased performance. Thread PGO allows collecting
|
||||||
|
profiling data to replace the estimates and better optimize these
|
||||||
|
decisions.
|
||||||
|
|
||||||
|
To use Thread PGO, Verilate the model with the :vlopt:`--prof-threads`
|
||||||
|
option.
|
||||||
|
|
||||||
|
Run the model executable. When the executable exits, it will create a
|
||||||
|
profile.vlt file.
|
||||||
|
|
||||||
|
Rerun Verilator, optionally omitting the :vlopt:`--prof-threads` option,
|
||||||
|
and adding the profile.vlt generated earlier to the command line.
|
||||||
|
|
||||||
|
Note there is no Verilator equivalent to GCC's --fprofile-use. Verilator's
|
||||||
|
profile data file (profile.vlt) can be placed on the verilator command line
|
||||||
|
directly without any prefix.
|
||||||
|
|
||||||
|
If results from multiple simulations are to be used in generating the
|
||||||
|
optimization, multiple simulation's profile.vlt may be concatenated
|
||||||
|
externally, or each of the files may be fed as separate command line
|
||||||
|
options into Verilator. Verilator will simply sum the profile results, so
|
||||||
|
a longer running test will have proportionally more weight for optimization
|
||||||
|
than a shorter running test.
|
||||||
|
|
||||||
|
If you provide any profile feedback data to Verilator, and it cannot use
|
||||||
|
it, it will issue the :option:`PROFOUTOFDATE` warning that threads were
|
||||||
|
scheduled using estimated costs. This usually indicates that the profile
|
||||||
|
data was generated from different Verilog source code than Verilator is
|
||||||
|
currently running against. Therefore, repeat the data collection phase to
|
||||||
|
create new profiling data, then rerun Verilator with the same input source
|
||||||
|
files and that new profiling data.
|
||||||
|
|
||||||
|
|
||||||
|
.. _Compiler PGO:
|
||||||
|
|
||||||
|
Compiler Profile-Guided Optimization
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
GCC and Clang support compiler profile-guided optimization (PGO). This
|
||||||
|
optimizes any C/C++ program including Verilated code. Using compiler PGO
|
||||||
|
typically yields improvements of 5-15% on both single-threaded and
|
||||||
|
multi-threaded models.
|
||||||
|
|
||||||
|
To use compiler PGO with GCC or Clang, please see the appropriate compiler
|
||||||
|
documentation. The process in GCC 10 was as follows:
|
||||||
|
|
||||||
|
1. Compile the Verilated model with the compiler's "-fprofile-generate"
|
||||||
|
flag:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
verilator [whatever_flags] --make \
|
||||||
|
-CFLAGS -fprofile-generate -LDFLAGS -fprofile-generate
|
||||||
|
|
||||||
|
or, if calling make yourself, add -fprofile-generate appropriately to your
|
||||||
|
Makefile.
|
||||||
|
|
||||||
|
2. Run your simulation. This will create \*.gcda file(s) in the same
|
||||||
|
directory as the source files.
|
||||||
|
|
||||||
|
3. Recompile the model with -fprofile-use. The compiler will read the
|
||||||
|
\*.gcda file(s).
|
||||||
|
|
||||||
|
For GCC:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
verilator [whatever_flags] --build \
|
||||||
|
-CFLAGS "-fprofile-use -fprofile-correction"
|
||||||
|
|
||||||
|
For Clang:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
llvm-profdata merge -output default.profdata *.profraw
|
||||||
|
verilator [whatever_flags] --build \
|
||||||
|
-CFLAGS "-fprofile-use -fprofile-correction"
|
||||||
|
|
||||||
|
or, if calling make yourself, add these CFLAGS switches appropriately to
|
||||||
|
your Makefile.
|
||||||
|
|
||||||
|
Clang and GCC also support -fauto-profile which uses sample-based
|
||||||
|
feedback-directed optimization. See the appropriate compiler
|
||||||
|
documentation.
|
||||||
|
@ -1011,6 +1011,22 @@ List Of Warnings
|
|||||||
a var/reg must be used as the target of procedural assignments.
|
a var/reg must be used as the target of procedural assignments.
|
||||||
|
|
||||||
|
|
||||||
|
.. option:: PROFOUTOFDATE
|
||||||
|
|
||||||
|
Warns that threads were scheduled using estimated costs, despite the
|
||||||
|
fact that data was provided from profile-guided optimization (see
|
||||||
|
:ref:`Thread PGO`) as fed into Verilator using the
|
||||||
|
:option:`profile_data` configuration file option. This usually
|
||||||
|
indicates that the profile data was generated from different Verilog
|
||||||
|
source code than Verilator is currently running against.
|
||||||
|
|
||||||
|
It is recommended to create new profiling data, then rerun Verilator
|
||||||
|
with the same input source files and that new profiling data.
|
||||||
|
|
||||||
|
Ignoring this warning may only slow simulations, it will simulate
|
||||||
|
correctly.
|
||||||
|
|
||||||
|
|
||||||
.. option:: PROTECTED
|
.. option:: PROTECTED
|
||||||
|
|
||||||
Warning that a 'pragma protected' section was encountered. The code
|
Warning that a 'pragma protected' section was encountered. The code
|
||||||
|
@ -405,6 +405,9 @@ routines in the sources to rely more heavily on randomness, and
|
|||||||
generally try harder not to keep input nodes together when we have the
|
generally try harder not to keep input nodes together when we have the
|
||||||
option to scramble things.
|
option to scramble things.
|
||||||
|
|
||||||
|
Profile-guided optimization make this a bit better, by adjusting mtask
|
||||||
|
scheduling, but this does not yet guide the packing into mtasks.
|
||||||
|
|
||||||
|
|
||||||
Performance Regression
|
Performance Regression
|
||||||
""""""""""""""""""""""
|
""""""""""""""""""""""
|
||||||
|
@ -2258,6 +2258,7 @@ VerilatedContext::VerilatedContext()
|
|||||||
Verilated::lastContextp(this);
|
Verilated::lastContextp(this);
|
||||||
Verilated::threadContextp(this);
|
Verilated::threadContextp(this);
|
||||||
m_ns.m_profThreadsFilename = "profile_threads.dat";
|
m_ns.m_profThreadsFilename = "profile_threads.dat";
|
||||||
|
m_ns.m_profVltFilename = "profile.vlt";
|
||||||
m_fdps.resize(31);
|
m_fdps.resize(31);
|
||||||
std::fill(m_fdps.begin(), m_fdps.end(), static_cast<FILE*>(nullptr));
|
std::fill(m_fdps.begin(), m_fdps.end(), static_cast<FILE*>(nullptr));
|
||||||
m_fdFreeMct.resize(30);
|
m_fdFreeMct.resize(30);
|
||||||
@ -2340,6 +2341,14 @@ std::string VerilatedContext::profThreadsFilename() const VL_MT_SAFE {
|
|||||||
const VerilatedLockGuard lock{m_mutex};
|
const VerilatedLockGuard lock{m_mutex};
|
||||||
return m_ns.m_profThreadsFilename;
|
return m_ns.m_profThreadsFilename;
|
||||||
}
|
}
|
||||||
|
void VerilatedContext::profVltFilename(const std::string& flag) VL_MT_SAFE {
|
||||||
|
const VerilatedLockGuard lock{m_mutex};
|
||||||
|
m_ns.m_profVltFilename = flag;
|
||||||
|
}
|
||||||
|
std::string VerilatedContext::profVltFilename() const VL_MT_SAFE {
|
||||||
|
const VerilatedLockGuard lock{m_mutex};
|
||||||
|
return m_ns.m_profVltFilename;
|
||||||
|
}
|
||||||
void VerilatedContext::randReset(int val) VL_MT_SAFE {
|
void VerilatedContext::randReset(int val) VL_MT_SAFE {
|
||||||
const VerilatedLockGuard lock{m_mutex};
|
const VerilatedLockGuard lock{m_mutex};
|
||||||
m_s.m_randReset = val;
|
m_s.m_randReset = val;
|
||||||
@ -2495,6 +2504,8 @@ void VerilatedContextImp::commandArgVl(const std::string& arg) {
|
|||||||
profThreadsWindow(std::atol(value.c_str()));
|
profThreadsWindow(std::atol(value.c_str()));
|
||||||
} else if (commandArgVlValue(arg, "+verilator+prof+threads+file+", value /*ref*/)) {
|
} else if (commandArgVlValue(arg, "+verilator+prof+threads+file+", value /*ref*/)) {
|
||||||
profThreadsFilename(value);
|
profThreadsFilename(value);
|
||||||
|
} else if (commandArgVlValue(arg, "+verilator+prof+vlt+file+", value /*ref*/)) {
|
||||||
|
profVltFilename(value);
|
||||||
} else if (commandArgVlValue(arg, "+verilator+rand+reset+", value /*ref*/)) {
|
} else if (commandArgVlValue(arg, "+verilator+rand+reset+", value /*ref*/)) {
|
||||||
randReset(std::atoi(value.c_str()));
|
randReset(std::atoi(value.c_str()));
|
||||||
} else if (commandArgVlValue(arg, "+verilator+seed+", value /*ref*/)) {
|
} else if (commandArgVlValue(arg, "+verilator+seed+", value /*ref*/)) {
|
||||||
|
@ -346,6 +346,7 @@ protected:
|
|||||||
vluint32_t m_profThreadsWindow = 2; // +prof+threads window size
|
vluint32_t m_profThreadsWindow = 2; // +prof+threads window size
|
||||||
// Slow path
|
// Slow path
|
||||||
std::string m_profThreadsFilename; // +prof+threads filename
|
std::string m_profThreadsFilename; // +prof+threads filename
|
||||||
|
std::string m_profVltFilename; // +prof+vlt filename
|
||||||
} m_ns;
|
} m_ns;
|
||||||
|
|
||||||
mutable VerilatedMutex m_argMutex; // Protect m_argVec, m_argVecLoaded
|
mutable VerilatedMutex m_argMutex; // Protect m_argVec, m_argVecLoaded
|
||||||
@ -522,6 +523,8 @@ public: // But for internal use only
|
|||||||
vluint32_t profThreadsWindow() const VL_MT_SAFE { return m_ns.m_profThreadsWindow; }
|
vluint32_t profThreadsWindow() const VL_MT_SAFE { return m_ns.m_profThreadsWindow; }
|
||||||
void profThreadsFilename(const std::string& flag) VL_MT_SAFE;
|
void profThreadsFilename(const std::string& flag) VL_MT_SAFE;
|
||||||
std::string profThreadsFilename() const VL_MT_SAFE;
|
std::string profThreadsFilename() const VL_MT_SAFE;
|
||||||
|
void profVltFilename(const std::string& flag) VL_MT_SAFE;
|
||||||
|
std::string profVltFilename() const VL_MT_SAFE;
|
||||||
|
|
||||||
// Internal: Find scope
|
// Internal: Find scope
|
||||||
const VerilatedScope* scopeFind(const char* namep) const VL_MT_SAFE;
|
const VerilatedScope* scopeFind(const char* namep) const VL_MT_SAFE;
|
||||||
|
113
include/verilated_profiler.h
Normal file
113
include/verilated_profiler.h
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||||
|
//=============================================================================
|
||||||
|
//
|
||||||
|
// Code available from: https://verilator.org
|
||||||
|
//
|
||||||
|
// Copyright 2012-2021 by Wilson Snyder. This program is free software; you
|
||||||
|
// can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
// Version 2.0.
|
||||||
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
//
|
||||||
|
//=============================================================================
|
||||||
|
///
|
||||||
|
/// \file
|
||||||
|
/// \brief Verilated general profiling header
|
||||||
|
///
|
||||||
|
/// This file is not part of the Verilated public-facing API.
|
||||||
|
/// It is only for internal use by Verilated library routines.
|
||||||
|
///
|
||||||
|
//=============================================================================
|
||||||
|
|
||||||
|
#ifndef VERILATOR_VERILATED_PROFILER_H_
|
||||||
|
#define VERILATOR_VERILATED_PROFILER_H_
|
||||||
|
|
||||||
|
#include "verilatedos.h"
|
||||||
|
#include "verilated.h" // for VerilatedMutex and clang annotations
|
||||||
|
|
||||||
|
// Profile record, private class used only by this header
|
||||||
|
class VerilatedProfilerRec final {
|
||||||
|
std::string m_name; // Hashed name of mtask/etc
|
||||||
|
size_t m_counterNumber = 0; // Which counter has data
|
||||||
|
public:
|
||||||
|
// METHODS
|
||||||
|
VerilatedProfilerRec(size_t counterNumber, const std::string& name)
|
||||||
|
: m_name{name}
|
||||||
|
, m_counterNumber{counterNumber} {}
|
||||||
|
VerilatedProfilerRec() = default;
|
||||||
|
size_t counterNumber() const { return m_counterNumber; }
|
||||||
|
std::string name() const { return m_name; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create some number of bucketed profilers
|
||||||
|
template <std::size_t T_Entries> class VerilatedProfiler final {
|
||||||
|
// Counters are stored packed, all together, versus in VerilatedProfilerRec to
|
||||||
|
// reduce cache effects
|
||||||
|
std::array<vluint64_t, T_Entries> m_counters{}; // Time spent on this record
|
||||||
|
std::deque<VerilatedProfilerRec> m_records; // Record information
|
||||||
|
|
||||||
|
public:
|
||||||
|
// METHODS
|
||||||
|
VerilatedProfiler() = default;
|
||||||
|
~VerilatedProfiler() = default;
|
||||||
|
void write(const char* modelp, const std::string& filename) VL_MT_SAFE;
|
||||||
|
void addCounter(size_t counter, const std::string& name) {
|
||||||
|
VL_DEBUG_IF(assert(counter < T_Entries););
|
||||||
|
m_records.emplace_back(VerilatedProfilerRec{counter, name});
|
||||||
|
}
|
||||||
|
void startCounter(size_t counter) {
|
||||||
|
vluint64_t val;
|
||||||
|
VL_RDTSC(val);
|
||||||
|
// -= so when we add end time in stopCounter, we already subtracted
|
||||||
|
// out, without needing to hold another temporary
|
||||||
|
m_counters[counter] -= val;
|
||||||
|
}
|
||||||
|
void stopCounter(size_t counter) {
|
||||||
|
vluint64_t val;
|
||||||
|
VL_RDTSC(val);
|
||||||
|
m_counters[counter] += val;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <std::size_t T_Entries>
|
||||||
|
void VerilatedProfiler<T_Entries>::write(const char* modelp,
|
||||||
|
const std::string& filename) VL_MT_SAFE {
|
||||||
|
static VerilatedMutex s_mutex;
|
||||||
|
const VerilatedLockGuard lock{s_mutex};
|
||||||
|
|
||||||
|
// On the first call we create the file. On later calls we append.
|
||||||
|
// So when we have multiple models in an executable, possibly even
|
||||||
|
// running on different threads, each will have a different symtab so
|
||||||
|
// each will collect is own data correctly. However when each is
|
||||||
|
// destroid we need to get all the data, not keep overwriting and only
|
||||||
|
// get the last model's data.
|
||||||
|
static bool s_firstCall = true;
|
||||||
|
|
||||||
|
VL_DEBUG_IF(VL_DBG_MSGF("+prof+vlt+file writing to '%s'\n", filename.c_str()););
|
||||||
|
|
||||||
|
FILE* fp = nullptr;
|
||||||
|
if (!s_firstCall) fp = std::fopen(filename.c_str(), "a");
|
||||||
|
if (VL_UNLIKELY(!fp))
|
||||||
|
fp = std::fopen(filename.c_str(), "w"); // firstCall, or doesn't exist yet
|
||||||
|
if (VL_UNLIKELY(!fp)) {
|
||||||
|
VL_FATAL_MT(filename.c_str(), 0, "", "+prof+vlt+file file not writable");
|
||||||
|
// cppcheck-suppress resourceLeak // bug, doesn't realize fp is nullptr
|
||||||
|
return; // LCOV_EXCL_LINE
|
||||||
|
}
|
||||||
|
s_firstCall = false;
|
||||||
|
|
||||||
|
// TODO Perhaps merge with verilated_coverage output format, so can
|
||||||
|
// have a common merging and reporting tool, etc.
|
||||||
|
fprintf(fp, "// Verilated model profile-guided optimization data dump file\n");
|
||||||
|
fprintf(fp, "`verilator_config\n");
|
||||||
|
|
||||||
|
for (const auto& it : m_records) {
|
||||||
|
const std::string& name = it.name();
|
||||||
|
fprintf(fp, "profile_data -model \"%s\" -mtask \"%s\" -cost 64'd%" VL_PRI64 "u\n", modelp,
|
||||||
|
name.c_str(), m_counters[it.counterNumber()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -24,6 +24,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
//######################################################################
|
//######################################################################
|
||||||
// Resolve wildcards in files, modules, ftasks or variables
|
// Resolve wildcards in files, modules, ftasks or variables
|
||||||
@ -346,6 +347,9 @@ using V3ConfigFileResolver = V3ConfigWildcardResolver<V3ConfigFile>;
|
|||||||
class V3ConfigResolver final {
|
class V3ConfigResolver final {
|
||||||
V3ConfigModuleResolver m_modules; // Access to module names (with wildcards)
|
V3ConfigModuleResolver m_modules; // Access to module names (with wildcards)
|
||||||
V3ConfigFileResolver m_files; // Access to file names (with wildcards)
|
V3ConfigFileResolver m_files; // Access to file names (with wildcards)
|
||||||
|
std::unordered_map<string, std::unordered_map<string, vluint64_t>>
|
||||||
|
m_profileData; // Access to profile_data records
|
||||||
|
FileLine* m_profileFileLine = nullptr;
|
||||||
|
|
||||||
static V3ConfigResolver s_singleton; // Singleton (not via local static, as that's slow)
|
static V3ConfigResolver s_singleton; // Singleton (not via local static, as that's slow)
|
||||||
V3ConfigResolver() = default;
|
V3ConfigResolver() = default;
|
||||||
@ -356,6 +360,20 @@ public:
|
|||||||
|
|
||||||
V3ConfigModuleResolver& modules() { return m_modules; }
|
V3ConfigModuleResolver& modules() { return m_modules; }
|
||||||
V3ConfigFileResolver& files() { return m_files; }
|
V3ConfigFileResolver& files() { return m_files; }
|
||||||
|
|
||||||
|
void addProfileData(FileLine* fl, const string& model, const string& key, vluint64_t cost) {
|
||||||
|
if (!m_profileFileLine) m_profileFileLine = fl;
|
||||||
|
if (cost == 0) cost = 1; // Cost 0 means delete (or no data)
|
||||||
|
m_profileData[model][key] += cost;
|
||||||
|
}
|
||||||
|
vluint64_t getProfileData(const string& model, const string& key) const {
|
||||||
|
const auto mit = m_profileData.find(model);
|
||||||
|
if (mit == m_profileData.cend()) return 0;
|
||||||
|
const auto it = mit->second.find(key);
|
||||||
|
if (it == mit->second.cend()) return 0;
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
FileLine* getProfileDataFileLine() const { return m_profileFileLine; } // Maybe null
|
||||||
};
|
};
|
||||||
|
|
||||||
V3ConfigResolver V3ConfigResolver::s_singleton;
|
V3ConfigResolver V3ConfigResolver::s_singleton;
|
||||||
@ -392,10 +410,6 @@ void V3Config::addIgnore(V3ErrorCode code, bool on, const string& filename, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void V3Config::addModulePragma(const string& module, AstPragmaType pragma) {
|
|
||||||
V3ConfigResolver::s().modules().at(module).addModulePragma(pragma);
|
|
||||||
}
|
|
||||||
|
|
||||||
void V3Config::addInline(FileLine* fl, const string& module, const string& ftask, bool on) {
|
void V3Config::addInline(FileLine* fl, const string& module, const string& ftask, bool on) {
|
||||||
if (ftask.empty()) {
|
if (ftask.empty()) {
|
||||||
V3ConfigResolver::s().modules().at(module).setInline(on);
|
V3ConfigResolver::s().modules().at(module).setInline(on);
|
||||||
@ -408,6 +422,15 @@ void V3Config::addInline(FileLine* fl, const string& module, const string& ftask
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void V3Config::addModulePragma(const string& module, AstPragmaType pragma) {
|
||||||
|
V3ConfigResolver::s().modules().at(module).addModulePragma(pragma);
|
||||||
|
}
|
||||||
|
|
||||||
|
void V3Config::addProfileData(FileLine* fl, const string& model, const string& key,
|
||||||
|
vluint64_t cost) {
|
||||||
|
V3ConfigResolver::s().addProfileData(fl, model, key, cost);
|
||||||
|
}
|
||||||
|
|
||||||
void V3Config::addVarAttr(FileLine* fl, const string& module, const string& ftask,
|
void V3Config::addVarAttr(FileLine* fl, const string& module, const string& ftask,
|
||||||
const string& var, AstAttrType attr, AstSenTree* sensep) {
|
const string& var, AstAttrType attr, AstSenTree* sensep) {
|
||||||
// Semantics: sensep only if public_flat_rw
|
// Semantics: sensep only if public_flat_rw
|
||||||
@ -497,6 +520,13 @@ void V3Config::applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar
|
|||||||
if (vp) vp->apply(varp);
|
if (vp) vp->apply(varp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vluint64_t V3Config::getProfileData(const string& model, const string& key) {
|
||||||
|
return V3ConfigResolver::s().getProfileData(model, key);
|
||||||
|
}
|
||||||
|
FileLine* V3Config::getProfileDataFileLine() {
|
||||||
|
return V3ConfigResolver::s().getProfileDataFileLine();
|
||||||
|
}
|
||||||
|
|
||||||
bool V3Config::waive(FileLine* filelinep, V3ErrorCode code, const string& message) {
|
bool V3Config::waive(FileLine* filelinep, V3ErrorCode code, const string& message) {
|
||||||
V3ConfigFile* filep = V3ConfigResolver::s().files().resolve(filelinep->filename());
|
V3ConfigFile* filep = V3ConfigResolver::s().files().resolve(filelinep->filename());
|
||||||
if (!filep) return false;
|
if (!filep) return false;
|
||||||
|
@ -33,17 +33,23 @@ public:
|
|||||||
static void addCoverageBlockOff(const string& file, int lineno);
|
static void addCoverageBlockOff(const string& file, int lineno);
|
||||||
static void addCoverageBlockOff(const string& module, const string& blockname);
|
static void addCoverageBlockOff(const string& module, const string& blockname);
|
||||||
static void addIgnore(V3ErrorCode code, bool on, const string& filename, int min, int max);
|
static void addIgnore(V3ErrorCode code, bool on, const string& filename, int min, int max);
|
||||||
static void addWaiver(V3ErrorCode code, const string& filename, const string& message);
|
|
||||||
static void addModulePragma(const string& module, AstPragmaType pragma);
|
|
||||||
static void addInline(FileLine* fl, const string& module, const string& ftask, bool on);
|
static void addInline(FileLine* fl, const string& module, const string& ftask, bool on);
|
||||||
|
static void addModulePragma(const string& module, AstPragmaType pragma);
|
||||||
|
static void addProfileData(FileLine* fl, const string& model, const string& key,
|
||||||
|
vluint64_t cost);
|
||||||
|
static void addWaiver(V3ErrorCode code, const string& filename, const string& message);
|
||||||
static void addVarAttr(FileLine* fl, const string& module, const string& ftask,
|
static void addVarAttr(FileLine* fl, const string& module, const string& ftask,
|
||||||
const string& signal, AstAttrType type, AstSenTree* nodep);
|
const string& signal, AstAttrType type, AstSenTree* nodep);
|
||||||
|
|
||||||
static void applyCase(AstCase* nodep);
|
static void applyCase(AstCase* nodep);
|
||||||
static void applyCoverageBlock(AstNodeModule* modulep, AstBegin* nodep);
|
static void applyCoverageBlock(AstNodeModule* modulep, AstBegin* nodep);
|
||||||
static void applyIgnores(FileLine* filelinep);
|
static void applyIgnores(FileLine* filelinep);
|
||||||
static void applyModule(AstNodeModule* modulep);
|
static void applyModule(AstNodeModule* modulep);
|
||||||
static void applyFTask(AstNodeModule* modulep, AstNodeFTask* ftaskp);
|
static void applyFTask(AstNodeModule* modulep, AstNodeFTask* ftaskp);
|
||||||
static void applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar* varp);
|
static void applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar* varp);
|
||||||
|
|
||||||
|
static uint64_t getProfileData(const string& model, const string& key);
|
||||||
|
static FileLine* getProfileDataFileLine();
|
||||||
static bool waive(FileLine* filelinep, V3ErrorCode code, const string& message);
|
static bool waive(FileLine* filelinep, V3ErrorCode code, const string& message);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "V3EmitC.h"
|
#include "V3EmitC.h"
|
||||||
#include "V3EmitCBase.h"
|
#include "V3EmitCBase.h"
|
||||||
#include "V3LanguageWords.h"
|
#include "V3LanguageWords.h"
|
||||||
|
#include "V3PartitionGraph.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -394,6 +395,7 @@ void EmitCSyms::emitSymHdr() {
|
|||||||
if (v3Global.needTraceDumper()) {
|
if (v3Global.needTraceDumper()) {
|
||||||
puts("#include \"" + v3Global.opt.traceSourceLang() + ".h\"\n");
|
puts("#include \"" + v3Global.opt.traceSourceLang() + ".h\"\n");
|
||||||
}
|
}
|
||||||
|
if (v3Global.opt.profThreads()) puts("#include \"verilated_profiler.h\"\n");
|
||||||
|
|
||||||
puts("\n// INCLUDE MODEL CLASS\n");
|
puts("\n// INCLUDE MODEL CLASS\n");
|
||||||
puts("\n#include \"" + topClassName() + ".h\"\n");
|
puts("\n#include \"" + topClassName() + ".h\"\n");
|
||||||
@ -475,6 +477,21 @@ void EmitCSyms::emitSymHdr() {
|
|||||||
puts("];\n");
|
puts("];\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
puts("\n// PROFILING\n");
|
||||||
|
vluint64_t maxProfilerId = 0;
|
||||||
|
if (v3Global.opt.mtasks()) {
|
||||||
|
for (const V3GraphVertex* vxp
|
||||||
|
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
|
||||||
|
vxp; vxp = vxp->verticesNextp()) {
|
||||||
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
|
if (maxProfilerId < mtp->profilerId()) maxProfilerId = mtp->profilerId();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++maxProfilerId; // As size must include 0
|
||||||
|
puts("VerilatedProfiler<" + cvtToStr(maxProfilerId) + "> _vm_profiler;\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (!m_scopeNames.empty()) { // Scope names
|
if (!m_scopeNames.empty()) { // Scope names
|
||||||
puts("\n// SCOPE NAMES\n");
|
puts("\n// SCOPE NAMES\n");
|
||||||
for (const auto& itr : m_scopeNames) {
|
for (const auto& itr : m_scopeNames) {
|
||||||
@ -654,6 +671,7 @@ void EmitCSyms::emitSymImp() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
puts("// FUNCTIONS\n");
|
puts("// FUNCTIONS\n");
|
||||||
|
|
||||||
// Destructor
|
// Destructor
|
||||||
puts(symClassName() + "::~" + symClassName() + "()\n");
|
puts(symClassName() + "::~" + symClassName() + "()\n");
|
||||||
puts("{\n");
|
puts("{\n");
|
||||||
@ -663,7 +681,11 @@ void EmitCSyms::emitSymImp() {
|
|||||||
puts("if (__Vm_dumping) _traceDumpClose();\n");
|
puts("if (__Vm_dumping) _traceDumpClose();\n");
|
||||||
puts("#endif // VM_TRACE\n");
|
puts("#endif // VM_TRACE\n");
|
||||||
}
|
}
|
||||||
if (v3Global.opt.mtasks()) { puts("delete __Vm_threadPoolp;\n"); }
|
if (v3Global.opt.profThreads()) {
|
||||||
|
puts("_vm_profiler.write(\"" + topClassName()
|
||||||
|
+ "\", _vm_contextp__->profVltFilename());\n");
|
||||||
|
}
|
||||||
|
if (v3Global.opt.mtasks()) puts("delete __Vm_threadPoolp;\n");
|
||||||
puts("}\n\n");
|
puts("}\n\n");
|
||||||
|
|
||||||
// Constructor
|
// Constructor
|
||||||
@ -718,6 +740,19 @@ void EmitCSyms::emitSymImp() {
|
|||||||
}
|
}
|
||||||
puts("{\n");
|
puts("{\n");
|
||||||
|
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
puts("// Configure profiling\n");
|
||||||
|
if (v3Global.opt.mtasks()) {
|
||||||
|
for (const V3GraphVertex* vxp
|
||||||
|
= v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
|
||||||
|
vxp; vxp = vxp->verticesNextp()) {
|
||||||
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
|
puts("_vm_profiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \""
|
||||||
|
+ mtp->hashName() + "\");\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
puts("// Configure time unit / time precision\n");
|
puts("// Configure time unit / time precision\n");
|
||||||
if (!v3Global.rootp()->timeunit().isNone()) {
|
if (!v3Global.rootp()->timeunit().isNone()) {
|
||||||
puts("_vm_contextp__->timeunit(");
|
puts("_vm_contextp__->timeunit(");
|
||||||
|
@ -110,6 +110,7 @@ public:
|
|||||||
PINNOTFOUND, // instance port name not found in it's module
|
PINNOTFOUND, // instance port name not found in it's module
|
||||||
PKGNODECL, // Error: Package/class needs to be predeclared
|
PKGNODECL, // Error: Package/class needs to be predeclared
|
||||||
PROCASSWIRE, // Procedural assignment on wire
|
PROCASSWIRE, // Procedural assignment on wire
|
||||||
|
PROFOUTOFDATE, // Profile data out of date
|
||||||
PROTECTED, // detected `pragma protected
|
PROTECTED, // detected `pragma protected
|
||||||
RANDC, // Unsupported: 'randc' converted to 'rand'
|
RANDC, // Unsupported: 'randc' converted to 'rand'
|
||||||
REALCVT, // Real conversion
|
REALCVT, // Real conversion
|
||||||
@ -173,7 +174,7 @@ public:
|
|||||||
"LATCH", "LITENDIAN", "MODDUP",
|
"LATCH", "LITENDIAN", "MODDUP",
|
||||||
"MULTIDRIVEN", "MULTITOP","NOLATCH", "NULLPORT", "PINCONNECTEMPTY",
|
"MULTIDRIVEN", "MULTITOP","NOLATCH", "NULLPORT", "PINCONNECTEMPTY",
|
||||||
"PINMISSING", "PINNOCONNECT", "PINNOTFOUND", "PKGNODECL", "PROCASSWIRE",
|
"PINMISSING", "PINNOCONNECT", "PINNOTFOUND", "PKGNODECL", "PROCASSWIRE",
|
||||||
"PROTECTED", "RANDC", "REALCVT", "REDEFMACRO",
|
"PROFOUTOFDATE", "PROTECTED", "RANDC", "REALCVT", "REDEFMACRO",
|
||||||
"SELRANGE", "SHORTREAL", "SPLITVAR", "STMTDLY", "SYMRSVDWORD", "SYNCASYNCNET",
|
"SELRANGE", "SHORTREAL", "SPLITVAR", "STMTDLY", "SYMRSVDWORD", "SYNCASYNCNET",
|
||||||
"TICKCOUNT", "TIMESCALEMOD",
|
"TICKCOUNT", "TIMESCALEMOD",
|
||||||
"UNDRIVEN", "UNOPT", "UNOPTFLAT", "UNOPTTHREADS",
|
"UNDRIVEN", "UNOPT", "UNOPTFLAT", "UNOPTTHREADS",
|
||||||
|
@ -455,6 +455,9 @@ private:
|
|||||||
iterateNull(nodep->ftaskp());
|
iterateNull(nodep->ftaskp());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
virtual void visit(AstMTaskBody* nodep) override {
|
||||||
|
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
|
||||||
|
}
|
||||||
virtual void visit(AstNodeProcedure* nodep) override {
|
virtual void visit(AstNodeProcedure* nodep) override {
|
||||||
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
|
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "verilatedos.h"
|
#include "verilatedos.h"
|
||||||
|
|
||||||
#include "V3EmitCBase.h"
|
#include "V3EmitCBase.h"
|
||||||
|
#include "V3Config.h"
|
||||||
#include "V3Os.h"
|
#include "V3Os.h"
|
||||||
#include "V3File.h"
|
#include "V3File.h"
|
||||||
#include "V3GraphAlg.h"
|
#include "V3GraphAlg.h"
|
||||||
@ -27,6 +28,7 @@
|
|||||||
#include "V3PartitionGraph.h"
|
#include "V3PartitionGraph.h"
|
||||||
#include "V3Scoreboard.h"
|
#include "V3Scoreboard.h"
|
||||||
#include "V3Stats.h"
|
#include "V3Stats.h"
|
||||||
|
#include "V3UniqueNames.h"
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -2615,15 +2617,152 @@ void V3Partition::go(V3Graph* mtasksp) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add(std::unordered_map<int, vluint64_t>& cmap, int id, vluint64_t cost) { cmap[id] += cost; }
|
||||||
|
|
||||||
|
using EstimateAndProfiled = std::pair<uint64_t, vluint64_t>; // cost est, cost profiled
|
||||||
|
using Costs = std::unordered_map<uint32_t, EstimateAndProfiled>;
|
||||||
|
|
||||||
|
static void normalizeCosts(Costs& costs) {
|
||||||
|
const auto scaleCost = [](vluint64_t value, double multiplier) {
|
||||||
|
double scaled = static_cast<double>(value) * multiplier;
|
||||||
|
if (value && scaled < 1) scaled = 1;
|
||||||
|
return static_cast<uint64_t>(scaled);
|
||||||
|
};
|
||||||
|
|
||||||
|
// For all costs with a profile, compute sum
|
||||||
|
vluint64_t sumCostProfiled = 0; // For data with estimate and profile
|
||||||
|
vluint64_t sumCostEstimate = 0; // For data with estimate and profile
|
||||||
|
for (const auto& est : costs) {
|
||||||
|
if (est.second.second) {
|
||||||
|
sumCostEstimate += est.second.first;
|
||||||
|
sumCostProfiled += est.second.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sumCostEstimate) {
|
||||||
|
// For data where we don't have profiled data, compute how much to
|
||||||
|
// scale up/down the estimate to make on same relative scale as
|
||||||
|
// profiled data. (Improves results if only a few profiles missing.)
|
||||||
|
double estToProfile
|
||||||
|
= static_cast<double>(sumCostProfiled) / static_cast<double>(sumCostEstimate);
|
||||||
|
UINFO(5, "Estimated data needs scaling by "
|
||||||
|
<< estToProfile << ", sumCostProfiled=" << sumCostProfiled
|
||||||
|
<< " sumCostEstimate=" << sumCostEstimate << endl);
|
||||||
|
for (auto& est : costs) {
|
||||||
|
uint64_t& costEstimate = est.second.first;
|
||||||
|
costEstimate = scaleCost(costEstimate, estToProfile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// COSTS can overflow a uint32. Using maximum value of costs, scale all down
|
||||||
|
vluint64_t maxCost = 0;
|
||||||
|
for (auto& est : costs) {
|
||||||
|
const uint64_t& costEstimate = est.second.first;
|
||||||
|
const uint64_t& costProfiled = est.second.second;
|
||||||
|
if (maxCost < costEstimate) maxCost = costEstimate;
|
||||||
|
if (maxCost < costProfiled) maxCost = costProfiled;
|
||||||
|
UINFO(9,
|
||||||
|
"Post uint scale: ce = " << est.second.first << " cp=" << est.second.second << endl);
|
||||||
|
}
|
||||||
|
vluint64_t scaleDownTo = 10000000; // Extra room for future algorithms to add costs
|
||||||
|
if (maxCost > scaleDownTo) {
|
||||||
|
const double scaleup = static_cast<double>(scaleDownTo) / static_cast<double>(maxCost);
|
||||||
|
UINFO(5, "Scaling data to within 32-bits by multiply by=" << scaleup << ", maxCost="
|
||||||
|
<< maxCost << endl);
|
||||||
|
for (auto& est : costs) {
|
||||||
|
est.second.first = scaleCost(est.second.first, scaleup);
|
||||||
|
est.second.second = scaleCost(est.second.second, scaleup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void V3Partition::selfTestNormalizeCosts() {
|
||||||
|
{ // Test that omitted profile data correctly scales estimates
|
||||||
|
Costs costs({// id est prof
|
||||||
|
{1, {10, 1000}},
|
||||||
|
{2, {20, 0}}, // Note no profile
|
||||||
|
{3, {30, 3000}}});
|
||||||
|
normalizeCosts(costs);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 1000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 1000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 2000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 0);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 3000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 3000);
|
||||||
|
}
|
||||||
|
{ // Test that very large profile data properly scales
|
||||||
|
Costs costs({// id est prof
|
||||||
|
{1, {10, 100000000000}},
|
||||||
|
{2, {20, 200000000000}},
|
||||||
|
{3, {30, 1}}}); // Make sure doesn't underflow
|
||||||
|
normalizeCosts(costs);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 2500000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 5000000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 5000000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 10000000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 7500000);
|
||||||
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fillinCosts(V3Graph* execMTaskGraphp) {
|
||||||
|
V3UniqueNames m_uniqueNames; // For generating unique mtask profile hash names
|
||||||
|
|
||||||
|
// Pass 1: See what profiling data applies
|
||||||
|
Costs costs; // For each mtask, costs
|
||||||
|
|
||||||
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
||||||
|
vxp = vxp->verticesNextp()) {
|
||||||
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
|
// Compute name of mtask, for hash lookup
|
||||||
|
mtp->hashName(m_uniqueNames.get(mtp->bodyp()));
|
||||||
|
|
||||||
|
// This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
|
||||||
|
vluint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false);
|
||||||
|
vluint64_t costProfiled = V3Config::getProfileData(v3Global.opt.prefix(), mtp->hashName());
|
||||||
|
if (costProfiled) {
|
||||||
|
UINFO(5, "Profile data for mtask " << mtp->id() << " " << mtp->hashName()
|
||||||
|
<< " cost override " << costProfiled << endl);
|
||||||
|
}
|
||||||
|
costs[mtp->id()] = std::make_pair(costEstimate, costProfiled);
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeCosts(costs /*ref*/);
|
||||||
|
|
||||||
|
int totalEstimates = 0;
|
||||||
|
int missingProfiles = 0;
|
||||||
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
||||||
|
vxp = vxp->verticesNextp()) {
|
||||||
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
|
const uint32_t costEstimate = costs[mtp->id()].first;
|
||||||
|
const uint64_t costProfiled = costs[mtp->id()].second;
|
||||||
|
UINFO(9, "ce = " << costEstimate << " cp=" << costProfiled << endl);
|
||||||
|
UASSERT(costEstimate <= (1UL << 31), "cost scaling math would overflow uint32");
|
||||||
|
UASSERT(costProfiled <= (1UL << 31), "cost scaling math would overflow uint32");
|
||||||
|
const uint64_t costProfiled32 = static_cast<uint32_t>(costProfiled);
|
||||||
|
uint32_t costToUse = costProfiled32;
|
||||||
|
if (!costProfiled32) {
|
||||||
|
costToUse = costEstimate;
|
||||||
|
if (costEstimate != 0) ++missingProfiles;
|
||||||
|
}
|
||||||
|
if (costEstimate != 0) ++totalEstimates;
|
||||||
|
mtp->cost(costToUse);
|
||||||
|
mtp->priority(costToUse);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (missingProfiles) {
|
||||||
|
if (FileLine* fl = V3Config::getProfileDataFileLine()) {
|
||||||
|
fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
|
||||||
|
<< missingProfiles << " of " << totalEstimates
|
||||||
|
<< " mtasks had no data");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
||||||
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
||||||
|
|
||||||
while (const V3GraphVertex* vxp = ser.nextp()) {
|
while (const V3GraphVertex* vxp = ser.nextp()) {
|
||||||
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
uint32_t costCount = V3InstrCount::count(mtp->bodyp(), false);
|
|
||||||
mtp->cost(costCount);
|
|
||||||
mtp->priority(costCount);
|
|
||||||
|
|
||||||
// "Priority" is the critical path from the start of the mtask, to
|
// "Priority" is the critical path from the start of the mtask, to
|
||||||
// the end of the graph reachable from this mtask. Given the
|
// the end of the graph reachable from this mtask. Given the
|
||||||
// choice among several ready mtasks, we'll want to start the
|
// choice among several ready mtasks, we'll want to start the
|
||||||
@ -2662,6 +2801,14 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Assign profiler IDs
|
||||||
|
vluint64_t profilerId = 0;
|
||||||
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
||||||
|
vxp = vxp->verticesNextp()) {
|
||||||
|
ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
||||||
|
mtp->profilerId(profilerId++);
|
||||||
|
}
|
||||||
|
|
||||||
// Removing tasks may cause edges that were formerly non-transitive to
|
// Removing tasks may cause edges that were formerly non-transitive to
|
||||||
// become transitive. Also we just created new edges around the removed
|
// become transitive. Also we just created new edges around the removed
|
||||||
// tasks, which could be transitive. Prune out all transitive edges.
|
// tasks, which could be transitive. Prune out all transitive edges.
|
||||||
@ -2718,6 +2865,11 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
|
|||||||
" " + cvtToStr(mtaskp->cost()) + ");\n" + //
|
" " + cvtToStr(mtaskp->cost()) + ");\n" + //
|
||||||
"}\n");
|
"}\n");
|
||||||
}
|
}
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
// No lock around startCounter, as counter numbers are unique per thread
|
||||||
|
addStrStmt("vlSymsp->_vm_profiler.startCounter(" + cvtToStr(mtaskp->profilerId())
|
||||||
|
+ ");\n");
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
|
addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
|
||||||
@ -2725,6 +2877,10 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
|
|||||||
// Move the the actual body of calls to leaf functions into this function
|
// Move the the actual body of calls to leaf functions into this function
|
||||||
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
|
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
|
||||||
|
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
// No lock around stopCounter, as counter numbers are unique per thread
|
||||||
|
addStrStmt("vlSymsp->_vm_profiler.stopCounter(" + cvtToStr(mtaskp->profilerId()) + ");\n");
|
||||||
|
}
|
||||||
if (v3Global.opt.profThreads()) {
|
if (v3Global.opt.profThreads()) {
|
||||||
addStrStmt("if (VL_UNLIKELY(" + recName + ")) " //
|
addStrStmt("if (VL_UNLIKELY(" + recName + ")) " //
|
||||||
+ recName + "->endRecord(VL_RDTSC_Q());\n");
|
+ recName + "->endRecord(VL_RDTSC_Q());\n");
|
||||||
@ -2851,9 +3007,10 @@ void V3Partition::finalize() {
|
|||||||
// V3LifePost) that can change the cost of logic within each mtask.
|
// V3LifePost) that can change the cost of logic within each mtask.
|
||||||
// Now that logic is final, recompute the cost and priority of each
|
// Now that logic is final, recompute the cost and priority of each
|
||||||
// ExecMTask.
|
// ExecMTask.
|
||||||
|
fillinCosts(execGraphp->mutableDepGraphp());
|
||||||
finalizeCosts(execGraphp->mutableDepGraphp());
|
finalizeCosts(execGraphp->mutableDepGraphp());
|
||||||
|
|
||||||
// Replace the graph body with it's multi-threaded implementation.
|
// Replace the graph body with its multi-threaded implementation.
|
||||||
implementExecGraph(execGraphp);
|
implementExecGraph(execGraphp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ public:
|
|||||||
void go(V3Graph* mtasksp);
|
void go(V3Graph* mtasksp);
|
||||||
|
|
||||||
static void selfTest();
|
static void selfTest();
|
||||||
|
static void selfTestNormalizeCosts();
|
||||||
|
|
||||||
// Print out a hash of the shape of graphp. Only needed to debug the
|
// Print out a hash of the shape of graphp. Only needed to debug the
|
||||||
// origin of some nondeterminism; otherwise this is pretty useless.
|
// origin of some nondeterminism; otherwise this is pretty useless.
|
||||||
|
@ -56,12 +56,14 @@ class ExecMTask final : public AbstractMTask {
|
|||||||
private:
|
private:
|
||||||
AstMTaskBody* const m_bodyp; // Task body
|
AstMTaskBody* const m_bodyp; // Task body
|
||||||
const uint32_t m_id; // Unique id of this mtask.
|
const uint32_t m_id; // Unique id of this mtask.
|
||||||
|
string m_hashName; // Hashed name for profile-driven optimization
|
||||||
uint32_t m_priority = 0; // Predicted critical path from the start of
|
uint32_t m_priority = 0; // Predicted critical path from the start of
|
||||||
// this mtask to the ends of the graph that are reachable from this
|
// this mtask to the ends of the graph that are reachable from this
|
||||||
// mtask. In abstract time units.
|
// mtask. In abstract time units.
|
||||||
uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same
|
uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same
|
||||||
// abstract time units as priority().
|
// abstract time units as priority().
|
||||||
uint64_t m_predictStart = 0; // Predicted start time of task
|
uint64_t m_predictStart = 0; // Predicted start time of task
|
||||||
|
uint64_t m_profilerId = 0; // VerilatedCounter number for profiling
|
||||||
VL_UNCOPYABLE(ExecMTask);
|
VL_UNCOPYABLE(ExecMTask);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -77,11 +79,15 @@ public:
|
|||||||
void cost(uint32_t cost) { m_cost = cost; }
|
void cost(uint32_t cost) { m_cost = cost; }
|
||||||
void predictStart(vluint64_t time) { m_predictStart = time; }
|
void predictStart(vluint64_t time) { m_predictStart = time; }
|
||||||
vluint64_t predictStart() const { return m_predictStart; }
|
vluint64_t predictStart() const { return m_predictStart; }
|
||||||
|
void profilerId(vluint64_t id) { m_profilerId = id; }
|
||||||
|
vluint64_t profilerId() const { return m_profilerId; }
|
||||||
string cFuncName() const {
|
string cFuncName() const {
|
||||||
// If this MTask maps to a C function, this should be the name
|
// If this MTask maps to a C function, this should be the name
|
||||||
return string("__Vmtask") + "__" + cvtToStr(m_id);
|
return string("__Vmtask") + "__" + cvtToStr(m_id);
|
||||||
}
|
}
|
||||||
virtual string name() const override { return string("mt") + cvtToStr(id()); }
|
virtual string name() const override { return string("mt") + cvtToStr(id()); }
|
||||||
|
string hashName() const { return m_hashName; }
|
||||||
|
void hashName(const string& name) { m_hashName = name; }
|
||||||
void dump(std::ostream& str) const {
|
void dump(std::ostream& str) const {
|
||||||
str << name() << "." << cvtToHex(this);
|
str << name() << "." << cvtToHex(this);
|
||||||
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
||||||
|
@ -590,6 +590,7 @@ static void verilate(const string& argString) {
|
|||||||
V3TSP::selfTest();
|
V3TSP::selfTest();
|
||||||
V3ScoreboardBase::selfTest();
|
V3ScoreboardBase::selfTest();
|
||||||
V3Partition::selfTest();
|
V3Partition::selfTest();
|
||||||
|
V3Partition::selfTestNormalizeCosts();
|
||||||
V3Broken::selfTest();
|
V3Broken::selfTest();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,6 +121,7 @@ vnum {vnum1}|{vnum2}|{vnum3}|{vnum4}|{vnum5}
|
|||||||
"no_clocker" { FL; return yVLT_NO_CLOCKER; }
|
"no_clocker" { FL; return yVLT_NO_CLOCKER; }
|
||||||
"no_inline" { FL; return yVLT_NO_INLINE; }
|
"no_inline" { FL; return yVLT_NO_INLINE; }
|
||||||
"parallel_case" { FL; return yVLT_PARALLEL_CASE; }
|
"parallel_case" { FL; return yVLT_PARALLEL_CASE; }
|
||||||
|
"profile_data" { FL; return yVLT_PROFILE_DATA; }
|
||||||
"public" { FL; return yVLT_PUBLIC; }
|
"public" { FL; return yVLT_PUBLIC; }
|
||||||
"public_flat" { FL; return yVLT_PUBLIC_FLAT; }
|
"public_flat" { FL; return yVLT_PUBLIC_FLAT; }
|
||||||
"public_flat_rd" { FL; return yVLT_PUBLIC_FLAT_RD; }
|
"public_flat_rd" { FL; return yVLT_PUBLIC_FLAT_RD; }
|
||||||
@ -133,12 +134,15 @@ vnum {vnum1}|{vnum2}|{vnum3}|{vnum4}|{vnum5}
|
|||||||
"tracing_on" { FL; return yVLT_TRACING_ON; }
|
"tracing_on" { FL; return yVLT_TRACING_ON; }
|
||||||
|
|
||||||
-?"-block" { FL; return yVLT_D_BLOCK; }
|
-?"-block" { FL; return yVLT_D_BLOCK; }
|
||||||
|
-?"-cost" { FL; return yVLT_D_COST; }
|
||||||
-?"-file" { FL; return yVLT_D_FILE; }
|
-?"-file" { FL; return yVLT_D_FILE; }
|
||||||
-?"-function" { FL; return yVLT_D_FUNCTION; }
|
-?"-function" { FL; return yVLT_D_FUNCTION; }
|
||||||
-?"-lines" { FL; return yVLT_D_LINES; }
|
-?"-lines" { FL; return yVLT_D_LINES; }
|
||||||
-?"-match" { FL; return yVLT_D_MATCH; }
|
-?"-match" { FL; return yVLT_D_MATCH; }
|
||||||
|
-?"-model" { FL; return yVLT_D_MODEL; }
|
||||||
-?"-module" { FL; return yVLT_D_MODULE; }
|
-?"-module" { FL; return yVLT_D_MODULE; }
|
||||||
-?"-msg" { FL; return yVLT_D_MSG; }
|
-?"-msg" { FL; return yVLT_D_MSG; }
|
||||||
|
-?"-mtask" { FL; return yVLT_D_MTASK; }
|
||||||
-?"-rule" { FL; return yVLT_D_RULE; }
|
-?"-rule" { FL; return yVLT_D_RULE; }
|
||||||
-?"-task" { FL; return yVLT_D_TASK; }
|
-?"-task" { FL; return yVLT_D_TASK; }
|
||||||
-?"-var" { FL; return yVLT_D_VAR; }
|
-?"-var" { FL; return yVLT_D_VAR; }
|
||||||
|
@ -363,6 +363,7 @@ BISONPRE_VERSION(3.7,%define api.header.include {"V3ParseBison.h"})
|
|||||||
%token<fl> yVLT_NO_CLOCKER "no_clocker"
|
%token<fl> yVLT_NO_CLOCKER "no_clocker"
|
||||||
%token<fl> yVLT_NO_INLINE "no_inline"
|
%token<fl> yVLT_NO_INLINE "no_inline"
|
||||||
%token<fl> yVLT_PARALLEL_CASE "parallel_case"
|
%token<fl> yVLT_PARALLEL_CASE "parallel_case"
|
||||||
|
%token<fl> yVLT_PROFILE_DATA "profile_data"
|
||||||
%token<fl> yVLT_PUBLIC "public"
|
%token<fl> yVLT_PUBLIC "public"
|
||||||
%token<fl> yVLT_PUBLIC_FLAT "public_flat"
|
%token<fl> yVLT_PUBLIC_FLAT "public_flat"
|
||||||
%token<fl> yVLT_PUBLIC_FLAT_RD "public_flat_rd"
|
%token<fl> yVLT_PUBLIC_FLAT_RD "public_flat_rd"
|
||||||
@ -375,12 +376,15 @@ BISONPRE_VERSION(3.7,%define api.header.include {"V3ParseBison.h"})
|
|||||||
%token<fl> yVLT_TRACING_ON "tracing_on"
|
%token<fl> yVLT_TRACING_ON "tracing_on"
|
||||||
|
|
||||||
%token<fl> yVLT_D_BLOCK "--block"
|
%token<fl> yVLT_D_BLOCK "--block"
|
||||||
|
%token<fl> yVLT_D_COST "--cost"
|
||||||
%token<fl> yVLT_D_FILE "--file"
|
%token<fl> yVLT_D_FILE "--file"
|
||||||
%token<fl> yVLT_D_FUNCTION "--function"
|
%token<fl> yVLT_D_FUNCTION "--function"
|
||||||
%token<fl> yVLT_D_LINES "--lines"
|
%token<fl> yVLT_D_LINES "--lines"
|
||||||
%token<fl> yVLT_D_MODULE "--module"
|
|
||||||
%token<fl> yVLT_D_MATCH "--match"
|
%token<fl> yVLT_D_MATCH "--match"
|
||||||
|
%token<fl> yVLT_D_MODEL "--model"
|
||||||
|
%token<fl> yVLT_D_MODULE "--module"
|
||||||
%token<fl> yVLT_D_MSG "--msg"
|
%token<fl> yVLT_D_MSG "--msg"
|
||||||
|
%token<fl> yVLT_D_MTASK "--mtask"
|
||||||
%token<fl> yVLT_D_RULE "--rule"
|
%token<fl> yVLT_D_RULE "--rule"
|
||||||
%token<fl> yVLT_D_TASK "--task"
|
%token<fl> yVLT_D_TASK "--task"
|
||||||
%token<fl> yVLT_D_VAR "--var"
|
%token<fl> yVLT_D_VAR "--var"
|
||||||
@ -6404,6 +6408,8 @@ vltItem:
|
|||||||
{ V3Config::addCaseParallel(*$3, 0); }
|
{ V3Config::addCaseParallel(*$3, 0); }
|
||||||
| yVLT_PARALLEL_CASE yVLT_D_FILE yaSTRING yVLT_D_LINES yaINTNUM
|
| yVLT_PARALLEL_CASE yVLT_D_FILE yaSTRING yVLT_D_LINES yaINTNUM
|
||||||
{ V3Config::addCaseParallel(*$3, $5->toUInt()); }
|
{ V3Config::addCaseParallel(*$3, $5->toUInt()); }
|
||||||
|
| yVLT_PROFILE_DATA yVLT_D_MODEL yaSTRING yVLT_D_MTASK yaSTRING yVLT_D_COST yaINTNUM
|
||||||
|
{ V3Config::addProfileData($<fl>1, *$3, *$5, $7->toUQuad()); }
|
||||||
;
|
;
|
||||||
|
|
||||||
vltOffFront<errcodeen>:
|
vltOffFront<errcodeen>:
|
||||||
|
@ -27,6 +27,7 @@ execute(
|
|||||||
all_run_flags => ["+verilator+prof+threads+start+2",
|
all_run_flags => ["+verilator+prof+threads+start+2",
|
||||||
" +verilator+prof+threads+window+2",
|
" +verilator+prof+threads+window+2",
|
||||||
" +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
|
" +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
|
||||||
|
" +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
|
||||||
],
|
],
|
||||||
check_finished => 1,
|
check_finished => 1,
|
||||||
);
|
);
|
||||||
|
@ -121,6 +121,7 @@ execute(
|
|||||||
all_run_flags => ["+verilator+prof+threads+start+100",
|
all_run_flags => ["+verilator+prof+threads+start+100",
|
||||||
" +verilator+prof+threads+window+2",
|
" +verilator+prof+threads+window+2",
|
||||||
" +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
|
" +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
|
||||||
|
" +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
|
||||||
],
|
],
|
||||||
check_finished => 1,
|
check_finished => 1,
|
||||||
);
|
);
|
||||||
|
6
test_regress/t/t_pgo_profoutofdate_bad.out
Normal file
6
test_regress/t/t_pgo_profoutofdate_bad.out
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
%Warning-PROFOUTOFDATE: t/t_pgo_profoutofdate_bad.v:27:1: Profile data for mtasks may be out of date. 3 of 3 mtasks had no data
|
||||||
|
27 | profile_data -model "x" -mtask "h7baded98__0" -cost 64'd12345678901234567890
|
||||||
|
| ^~~~~~~~~~~~
|
||||||
|
... For warning description see https://verilator.org/warn/PROFOUTOFDATE?v=latest
|
||||||
|
... Use "/* verilator lint_off PROFOUTOFDATE */" and lint_on around source to disable this message.
|
||||||
|
%Error: Exiting due to
|
20
test_regress/t/t_pgo_profoutofdate_bad.pl
Executable file
20
test_regress/t/t_pgo_profoutofdate_bad.pl
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2003 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
scenarios(vltmt => 1);
|
||||||
|
|
||||||
|
compile(
|
||||||
|
v_flags2 => ["--threads 2"],
|
||||||
|
fails => 1,
|
||||||
|
expect_filename => $Self->{golden_filename},
|
||||||
|
);
|
||||||
|
|
||||||
|
ok(1);
|
||||||
|
1;
|
28
test_regress/t/t_pgo_profoutofdate_bad.v
Executable file
28
test_regress/t/t_pgo_profoutofdate_bad.v
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2021 by Wilson Snyder.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
module t(/*AUTOARG*/
|
||||||
|
// Inputs
|
||||||
|
clk
|
||||||
|
);
|
||||||
|
input clk;
|
||||||
|
|
||||||
|
integer cyc=0;
|
||||||
|
|
||||||
|
// Test loop
|
||||||
|
always @ (posedge clk) begin
|
||||||
|
cyc <= cyc + 1;
|
||||||
|
if (cyc == 99) begin
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`verilator_config
|
||||||
|
profile_data -model "x" -mtask "h7baded98__0" -cost 64'd12345678901234567890
|
||||||
|
profile_data -model "x" -mtask "hb56134bd__0" -cost 945
|
42
test_regress/t/t_pgo_threads.pl
Executable file
42
test_regress/t/t_pgo_threads.pl
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2003 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
scenarios(vltmt => 1);
|
||||||
|
|
||||||
|
# It doesn't really matter what test
|
||||||
|
top_filename("t/t_gen_alw.v");
|
||||||
|
|
||||||
|
compile(
|
||||||
|
v_flags2 => ["--prof-threads --threads 2"]
|
||||||
|
);
|
||||||
|
|
||||||
|
execute(
|
||||||
|
all_run_flags => ["+verilator+prof+threads+start+0",
|
||||||
|
" +verilator+prof+threads+file+/dev/null",
|
||||||
|
" +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
|
||||||
|
],
|
||||||
|
check_finished => 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
file_grep("$Self->{obj_dir}/profile.vlt", qr/profile_data/i);
|
||||||
|
|
||||||
|
compile(
|
||||||
|
# Intentinally no --prof-threads here, so we make sure profile data
|
||||||
|
# can read in without it (that is no prof-thread effect on profile_data hash names)
|
||||||
|
v_flags2 => ["--threads 2",
|
||||||
|
" $Self->{obj_dir}/profile.vlt"],
|
||||||
|
);
|
||||||
|
|
||||||
|
execute(
|
||||||
|
check_finished => 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
ok(1);
|
||||||
|
1;
|
@ -21,10 +21,15 @@ compile(
|
|||||||
? "--threads 2 $root/include/verilated_threads.cpp" : ""),
|
? "--threads 2 $root/include/verilated_threads.cpp" : ""),
|
||||||
($Self->cfg_with_threaded
|
($Self->cfg_with_threaded
|
||||||
? "--trace-threads 1" : ""),
|
? "--trace-threads 1" : ""),
|
||||||
|
($Self->cfg_with_threaded
|
||||||
|
? "--prof-threads" : ""),
|
||||||
"$root/include/verilated_save.cpp"],
|
"$root/include/verilated_save.cpp"],
|
||||||
);
|
);
|
||||||
|
|
||||||
execute(
|
execute(
|
||||||
|
all_run_flags => [" +verilator+prof+threads+file+/dev/null",
|
||||||
|
" +verilator+prof+vlt+file+/dev/null",
|
||||||
|
],
|
||||||
check_finished => 1,
|
check_finished => 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -21,6 +21,9 @@ compile(
|
|||||||
);
|
);
|
||||||
|
|
||||||
execute(
|
execute(
|
||||||
|
all_run_flags => [" +verilator+prof+threads+file+/dev/null",
|
||||||
|
" +verilator+prof+vlt+file+/dev/null",
|
||||||
|
],
|
||||||
check_finished => 1,
|
check_finished => 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user