Merge branch 'master' into develop-v5

This commit is contained in:
Wilson Snyder 2022-07-04 13:20:03 -04:00
commit b25b798dbe
19 changed files with 276 additions and 221 deletions

View File

@ -284,33 +284,34 @@ detailed descriptions of these arguments.
--bbox-unsup Blackbox unsupported language features
--bin <filename> Override Verilator binary
--build Build model executable/library after Verilation
-CFLAGS <flags> C++ compiler arguments for makefile
--cc Create C++ output
--cdc Clock domain crossing analysis
-CFLAGS <flags> C++ compiler arguments for makefile
--clk <signal-name> Mark specified signal as clock
--make <build-tool> Generate scripts for specified build tool
--no-clk <signal-name> Prevent marking specified signal as clock
--compiler <compiler-name> Tune for specified C++ compiler
--converge-limit <loops> Tune convergence settle time
--coverage Enable all coverage
--coverage-line Enable line coverage
--coverage-max-width <width> Maximum array depth for coverage
--coverage-toggle Enable toggle coverage
--coverage-user Enable SVL user coverage
--coverage-underscore Enable coverage of _signals
--coverage-user Enable SVL user coverage
-D<var>[=<value>] Set preprocessor define
--debug Enable debugging
--debug-check Enable debugging assertions
--no-debug-leak Disable leaking memory in --debug mode
--debugi <level> Enable debugging at a specified level
--debugi-<srcfile> <level> Enable debugging a source file at a level
--no-decoration Disable comments and symbol decorations
--default-language <lang> Default language to parse
+define+<var>=<value> Set preprocessor define
--dpi-hdr-only Only produce the DPI header file
--dump-defines Show preprocessor defines with -E
--dump-tree Enable dumping .tree files
--dump-tree-addrids Use short identifiers instead of addresses
--dump-treei <level> Enable dumping .tree files at a level
--dump-treei-<srcfile> <level> Enable dumping .tree file at a source file at a level
--dump-tree-addrids Use short identifiers instead of addresses
-E Preprocess, but do not compile
--error-limit <value> Abort after this number of errors
--exe Link to create executable
@ -321,6 +322,7 @@ detailed descriptions of these arguments.
--flatten Force inlining of all modules, tasks and functions
-fno-<optimization> Disable internal optimization stage
-G<name>=<value> Overwrite top-level parameter
--gate-stmts <value> Tune gate optimizer depth
--gdb Run Verilator under GDB interactively
--gdbbt Run Verilator under GDB for backtrace
--generate-key Create random key for --protect-key
@ -328,53 +330,51 @@ detailed descriptions of these arguments.
--help Display this help
--hierarchical Enable hierarchical Verilation
-I<dir> Directory to search for includes
-j <jobs> Parallelism for --build
--gate-stmts <value> Tune gate optimizer depth
--if-depth <value> Tune IFDEPTH warning
+incdir+<dir> Directory to search for includes
--inline-mult <value> Tune module inlining
--instr-count-dpi <value> Assumed dynamic instruction count of DPI imports
-LDFLAGS <flags> Linker pre-object arguments for makefile
-j <jobs> Parallelism for --build
--l2-name <value> Verilog scope name of the top module
--language <lang> Default language standard to parse
-LDFLAGS <flags> Linker pre-object arguments for makefile
--lib-create <name> Create a DPI library
+libext+<ext>+[ext]... Extensions for finding modules
--lint-only Lint, but do not make output
--make <build-tool> Generate scripts for specified build tool
-MAKEFLAGS <flags> Arguments to pass to make during --build
--max-num-width <value> Maximum number width (default: 64K)
--MMD Create .d dependency files
--MP Create phony dependency targets
--Mdir <directory> Name of output object directory
--MMD Create .d dependency files
--mod-prefix <topname> Name to prepend to lower classes
--no-clk <signal-name> Prevent marking specified signal as clock
--no-decoration Disable comments and symbol decorations
--no-pins64 Don't use uint64_t's for 33-64 bit sigs
--no-skip-identical Disable skipping identical output
--MP Create phony dependency targets
+notimingchecks Ignored
-O0 Disable optimizations
-O3 High performance optimizations
-O<optimization-letter> Selectable optimizations
-o <executable> Name of final executable
--no-order-clock-delay Disable ordering clock enable assignments
--no-verilate Skip verilation and just compile previously Verilated code.
--output-split <statements> Split .cpp files into pieces
--output-split-cfuncs <statements> Split model functions
--output-split-ctrace <statements> Split tracing functions
-P Disable line numbers and blanks with -E
--pins-bv <bits> Specify types for top level ports
--pins-sc-uint Specify types for top level ports
--pins-sc-biguint Specify types for top level ports
--pins-sc-uint Specify types for top level ports
--pins-uint8 Specify types for top level ports
--no-pins64 Don't use uint64_t's for 33-64 bit sigs
--pipe-filter <command> Filter all input through a script
--pp-comments Show preprocessor comments with -E
--prefix <topname> Name of top level class
--private Debugging; see docs
--prof-c Compile C++ code with profiling
--prof-cfuncs Name functions for profiling
--prof-exec Enable generating execution profile for gantt chart
--prof-pgo Enable generating profiling data for PGO
--protect-key <key> Key for symbol protection
--protect-ids Hash identifier names for obscurity
--protect-key <key> Key for symbol protection
--protect-lib <name> Create a DPI protected library
--private Debugging; see docs
--public Debugging; see docs
--public-flat-rw Mark all variables, etc as public_flat_rw
-pvalue+<name>=<value> Overwrite toplevel parameter
@ -385,6 +385,7 @@ detailed descriptions of these arguments.
--rr Run Verilator and record with rr
--savable Enable model save-restore
--sc Create SystemC output
--no-skip-identical Disable skipping identical output
--stats Create statistics file
--stats-vars Provide statistics on variables
-sv Enable SystemVerilog parsing
@ -412,6 +413,7 @@ detailed descriptions of these arguments.
--unused-regexp <regexp> Tune UNUSED lint signals
-V Verbose version and config
-v <filename> Verilog library
--no-verilate Skip verilation and just compile previously Verilated code.
+verilog1995ext+<ext> Synonym for +1364-1995ext+<ext>
+verilog2001ext+<ext> Synonym for +1364-2001ext+<ext>
--version Displays program version and exits
@ -426,6 +428,9 @@ detailed descriptions of these arguments.
-Wno-lint Disable all lint warnings
-Wno-style Disable all style warnings
-Wpedantic Warn on compliance-test issues
-Wwarn-<message> Enable specified warning message
-Wwarn-lint Enable lint warning message
-Wwarn-style Enable style warning message
--x-assign <mode> Assign non-initial Xs to this value
--x-initial <mode> Assign initial Xs to this value
--x-initial-edge Enable initial X->0 and X->1 edge triggers

View File

@ -355,7 +355,7 @@ AC_SUBST(CFG_CXXFLAGS_PROFILE)
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++20)
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++20)
case "$(which lsb_release 2>&1 > /dev/null && lsb_release -d)" in
*Ubuntu*22.04*)
*Arch*Linux* | *Ubuntu*22.04*)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17)
;;

View File

@ -68,6 +68,7 @@ Lukasz Dalek
Maarten De Braekeleer
Maciej Sobkowski
Marco Widmer
Mariusz Glebocki
Markus Krause
Marlon James
Marshal Qiao

View File

@ -129,16 +129,6 @@ Summary:
is also used). Verilator manages the build itself, and for this --build
requires GNU Make to be available on the platform.
.. option:: -CFLAGS <flags>
Add specified C compiler argument to the generated makefiles. For
multiple flags either pass them as a single argument with space
separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use
multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`).
When make is run on the generated makefile these will be passed to the
C++ compiler (g++/clang++/msvc++).
.. option:: --cc
Specifies C++ without SystemC output mode; see also :vlopt:`--sc`
@ -156,6 +146,16 @@ Summary:
have interest in adding more traditional CDC checks, please contact the
authors.
.. option:: -CFLAGS <flags>
Add specified C compiler argument to the generated makefiles. For
multiple flags either pass them as a single argument with space
separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use
multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`).
When make is run on the generated makefile these will be passed to the
C++ compiler (g++/clang++/msvc++).
.. option:: --clk <signal-name>
With :vlopt:`--clk`, the specified signal is marked as a clock signal.
@ -176,6 +176,11 @@ Summary:
clock, and remove it from the combinatorial logic reevaluation checking
code. This may greatly improve performance.
.. option:: --no-clk <signal-name>
Prevent the specified signal from being marked as clock. See
:vlopt:`--clk`.
.. option:: --compiler <compiler-name>
Enables workarounds for the specified C++ compiler (list below).
@ -285,6 +290,13 @@ Summary:
<--debugi>`). Higher levels produce more detailed messages. See
:vlopt:`--debug` for other implications of enabling debug.
.. option:: --no-decoration
When creating output Verilated code, minimize comments, white space,
symbol names and other decorative items, at the cost of greatly reduced
readability. This may assist C++ compile times. This will not typically
change the ultimate model's performance, but may in some cases.
.. option:: --default-language <value>
Select the language to be used by default when first processing each
@ -582,21 +594,6 @@ Summary:
to limit the number of parallel build jobs but attempt to execute all
independent build steps in parallel.
.. option:: -LDFLAGS <flags>
Add specified C linker arguments to the generated makefiles. For multiple
flags either pass them as a single argument with space separators quoted
in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments
(``-LDFLAGS -a -LDFLAGS -b``).
When make is run on the generated makefile these will be passed to the
C++ linker (ld) **after** the primary file being linked. This flag is
called :vlopt:`-LDFLAGS` as that's the traditional name in simulators;
it's would have been better called LDLIBS as that's the Makefile
variable it controls. (In Make, LDFLAGS is before the first object,
LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not
LDFLAGS.)
.. option:: --l2-name <value>
Instead of using the module name when showing Verilog scope, use the
@ -613,12 +610,20 @@ Summary:
A synonym for :vlopt:`--default-language`, for compatibility with other
tools and earlier versions of Verilator.
.. option:: +libext+<ext>[+<ext>][...]
.. option:: -LDFLAGS <flags>
Specify the extensions that should be used for finding modules. If for
example module "my" is referenced, look in :file:`my.<ext>`. Note
"+libext+" is fairly standard across Verilog tools. Defaults to
".v+.sv".
Add specified C linker arguments to the generated makefiles. For multiple
flags either pass them as a single argument with space separators quoted
in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments
(``-LDFLAGS -a -LDFLAGS -b``).
When make is run on the generated makefile these will be passed to the
C++ linker (ld) **after** the primary file being linked. This flag is
called :vlopt:`-LDFLAGS` as that's the traditional name in simulators;
it's would have been better called LDLIBS as that's the Makefile
variable it controls. (In Make, LDFLAGS is before the first object,
LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not
LDFLAGS.)
.. option:: --lib-create <name>
@ -637,6 +642,13 @@ Summary:
See also :vlopt:`--protect-lib`.
.. option:: +libext+<ext>[+<ext>][...]
Specify the extensions that should be used for finding modules. If for
example module "my" is referenced, look in :file:`my.<ext>`. Note
"+libext+" is fairly standard across Verilog tools. Defaults to
".v+.sv".
.. option:: --lint-only
Check the files for lint violations only, do not create any other
@ -675,17 +687,6 @@ Summary:
Set the maximum number literal width (e.g. in 1024'd22 this it the
1024). Defaults to 64K.
.. option:: --MMD =item --no-MMD
Enable/disable creation of .d dependency files, used for make dependency
detection, similar to gcc -MMD option. By default this option is
enabled for :vlopt:`--cc` or :vlopt:`--sc` modes.
.. option:: --MP
When creating .d dependency files with :vlopt:`--MMD` option, make phony
targets. Similar to :command:`gcc -MP` option.
.. option:: --Mdir <directory>
Specifies the name of the Make object directory. All generated files
@ -693,33 +694,23 @@ Summary:
The directory is created if it does not exist and the parent directories
exist; otherwise manually create the Mdir before calling Verilator.
.. option:: --MMD
.. option:: --no-MMD
Enable/disable creation of .d dependency files, used for make dependency
detection, similar to gcc -MMD option. By default this option is
enabled for :vlopt:`--cc` or :vlopt:`--sc` modes.
.. option:: --mod-prefix <topname>
Specifies the name to prepend to all lower level classes. Defaults to
the same as :vlopt:`--prefix`.
.. option:: --no-clk <signal-name>
.. option:: --MP
Prevent the specified signal from being marked as clock. See
:vlopt:`--clk`.
.. option:: --no-decoration
When creating output Verilated code, minimize comments, white space,
symbol names and other decorative items, at the cost of greatly reduced
readability. This may assist C++ compile times. This will not typically
change the ultimate model's performance, but may in some cases.
.. option:: --no-pins64
Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`.
.. option:: --no-skip-identical =item --skip-identical
Rarely needed. Disables or enables skipping execution of Verilator if
all source files are identical, and all output files exist with newer
dates. By default this option is enabled for :vlopt:`--cc` or
:vlopt:`--sc` modes only.
When creating .d dependency files with :vlopt:`--MMD` option, make phony
targets. Similar to :command:`gcc -MP` option.
.. option:: +notimingchecks
@ -802,11 +793,6 @@ Summary:
With :vlopt:`-E`, disable generation of :code:`&96;line` markers and
blank lines, similar to :command:`gcc -P`.
.. option:: --pins64
Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note
that's a 65, not a 64.
.. option:: --pins-bv <width>
Specifies SystemC inputs/outputs of greater than or equal to <width>
@ -839,6 +825,15 @@ Summary:
of uint32_t. Likewise pins of width 9-16 will use uint16_t instead of
uint32_t.
.. option:: --pins64
Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note
that's a 65, not a 64.
.. option:: --no-pins64
Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`.
.. option:: --pipe-filter <command>
Rarely needed. Verilator will spawn the specified command as a
@ -868,6 +863,11 @@ Summary:
prepended to the name of the :vlopt:`--top` option, or V prepended to
the first Verilog filename passed on the command line.
.. option:: --private
Opposite of :vlopt:`--public`. Is the default; this option exists for
backwards compatibility.
.. option:: --prof-c
When compiling the C++ code, enable the compiler's profiling flag
@ -901,23 +901,6 @@ Summary:
Deprecated. Same as --prof-exec and --prof-pgo together.
.. option:: --protect-key <key>
Specifies the private key for :vlopt:`--protect-ids`. For best security
this key should be 16 or more random bytes, a reasonable secure choice
is the output of :command:`verilator --generate-key` . Typically, a key
would be created by the user once for a given protected design library,
then every Verilator run for subsequent versions of that library would
be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is
similar between library versions (Verilator runs), the Verilated code
will likewise be mostly similar.
If :vlopt:`--protect-key` is not specified and a key is needed,
Verilator will generate a new key for every Verilator run. As the key is
not saved, this is best for security, but means every Verilator run will
give vastly different output even for identical input, perhaps harming
compile times (and certainly thrashing any "ccache").
.. option:: --protect-ids
Hash any private identifiers (variable, module, and assertion block
@ -938,6 +921,23 @@ Summary:
prototypes. Use of the VPI is not recommended as many design details
may be exposed, and an INSECURE warning will be issued.
.. option:: --protect-key <key>
Specifies the private key for :vlopt:`--protect-ids`. For best security
this key should be 16 or more random bytes, a reasonable secure choice
is the output of :command:`verilator --generate-key` . Typically, a key
would be created by the user once for a given protected design library,
then every Verilator run for subsequent versions of that library would
be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is
similar between library versions (Verilator runs), the Verilated code
will likewise be mostly similar.
If :vlopt:`--protect-key` is not specified and a key is needed,
Verilator will generate a new key for every Verilator run. As the key is
not saved, this is best for security, but means every Verilator run will
give vastly different output even for identical input, perhaps harming
compile times (and certainly thrashing any "ccache").
.. option:: --protect-lib <name>
Produces a DPI library similar to :vlopt:`--lib-create`, but hides
@ -949,11 +949,6 @@ Summary:
in the distribution for a demonstration of how to build and use the DPI
library.
.. option:: --private
Opposite of :vlopt:`--public`. Is the default; this option exists for
backwards compatibility.
.. option:: --public
This is only for historical debug use. Using it may result in
@ -1046,6 +1041,15 @@ Summary:
Specifies SystemC output mode; see also :vlopt:`--cc` option.
.. option:: --skip-identical
.. option:: --no-skip-identical
Rarely needed. Disables or enables skipping execution of Verilator if
all source files are identical, and all output files exist with newer
dates. By default this option is enabled for :vlopt:`--cc` or
:vlopt:`--sc` modes only.
.. option:: --stats
Creates a dump file with statistics on the design in

View File

@ -187,7 +187,7 @@ VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW)
VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST))
VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW))
VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
# Note VM_GLOBAL_FAST and VM_GLOBAL_SLOW holds the files required from the
# run-time library. In practice everything is actually in VM_GLOBAL_FAST,

View File

@ -99,6 +99,10 @@ void VlExecutionProfiler::configure(const VerilatedContext& context) {
}
}
void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) {
profilep->setupThread(threadId);
}
void VlExecutionProfiler::setupThread(uint32_t threadId) {
// Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc
// while profiling.

View File

@ -23,11 +23,6 @@
#define VERILATOR_VERILATED_PROFILER_H_
#include "verilatedos.h"
#ifndef VL_PROFILER
#error "verilated_profiler.h/cpp expects VL_PROFILER (from --prof-{exec, pgo}"
#endif
#include "verilated.h"
#include <array>
@ -186,6 +181,9 @@ public:
void clear() VL_MT_SAFE_EXCLUDES(m_mutex);
// Write profiling data into file
void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex);
// Called via VlStartWorkerCb in VlWorkerThread::startWorker
static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId);
};
//=============================================================================

View File

@ -38,13 +38,13 @@
# include <unistd.h>
#endif
#ifndef O_LARGEFILE // For example on WIN32
#ifndef O_LARGEFILE // WIN32 headers omit this
# define O_LARGEFILE 0
#endif
#ifndef O_NONBLOCK
#ifndef O_NONBLOCK // WIN32 headers omit this
# define O_NONBLOCK 0
#endif
#ifndef O_CLOEXEC
#ifndef O_CLOEXEC // WIN32 headers omit this
# define O_CLOEXEC 0
#endif
// clang-format on

View File

@ -24,10 +24,6 @@
#include "verilatedos.h"
#include "verilated_threads.h"
#ifdef VL_PROFILER
#include "verilated_profiler.h"
#endif
#include <cstdio>
#include <memory>
#include <string>
@ -52,44 +48,35 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount)
// VlWorkerThread
VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp,
VlExecutionProfiler* profilerp)
VlExecutionProfiler* profilerp, VlStartWorkerCb startCb)
: m_ready_size{0}
, m_exiting{false}
, m_cthread{startWorker, this, threadId, profilerp}
, m_cthread{startWorker, this, threadId, profilerp, startCb}
, m_contextp{contextp} {}
VlWorkerThread::~VlWorkerThread() {
m_exiting.store(true, std::memory_order_release);
wakeUp();
shutdown();
// The thread should exit; join it.
m_cthread.join();
}
void VlWorkerThread::shutdownTask(void*, bool) {
// Deliberately empty, we use the address of this function as a magic number
}
void VlWorkerThread::workerLoop() {
ExecRec work;
work.m_fnp = nullptr;
while (true) {
if (VL_LIKELY(!work.m_fnp)) dequeWork(&work);
// Do this here, not above, to avoid a race with the destructor.
if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break;
if (VL_LIKELY(work.m_fnp)) {
work.m_fnp(work.m_selfp, work.m_evenCycle);
work.m_fnp = nullptr;
}
dequeWork(&work);
if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break;
work.m_fnp(work.m_selfp, work.m_evenCycle);
}
}
void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId,
VlExecutionProfiler* profilerp) {
VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) {
Verilated::threadContextp(workerp->m_contextp);
#ifdef VL_PROFILER
// Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might
// not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr.
if (profilerp) profilerp->setupThread(threadId);
#endif
if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId);
workerp->workerLoop();
}
@ -97,7 +84,7 @@ void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId,
// VlThreadPool
VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads,
VlExecutionProfiler* profiler) {
VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) {
// --threads N passes nThreads=N-1, as the "main" threads counts as 1
++nThreads;
const unsigned cpus = std::thread::hardware_concurrency();
@ -111,7 +98,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads,
}
// Create worker threads
for (uint32_t threadId = 1; threadId < nThreads; ++threadId) {
m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler});
m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb});
}
}

View File

@ -50,6 +50,9 @@
#endif
// clang-format on
class VlExecutionProfiler;
class VlThreadPool;
// VlMTaskVertex and VlThreadpool will work with multiple model class types.
// Since the type is opaque to VlMTaskVertex and VlThreadPool, represent it
// as a void* here.
@ -57,6 +60,9 @@ using VlSelfP = void*;
using VlExecFnp = void (*)(VlSelfP, bool);
// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler
using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId);
// Track dependencies for a single MTask.
class VlMTaskVertex final {
// MEMBERS
@ -129,9 +135,6 @@ public:
}
};
class VlExecutionProfiler;
class VlThreadPool;
class VlWorkerThread final {
private:
// TYPES
@ -162,7 +165,6 @@ private:
// Store the size atomically, so we can spin wait
std::atomic<size_t> m_ready_size;
std::atomic<bool> m_exiting; // Worker thread should exit
std::thread m_cthread; // Underlying C++ thread record
VerilatedContext* const m_contextp; // Context for spawned thread
@ -171,7 +173,7 @@ private:
public:
// CONSTRUCTORS
explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp,
VlExecutionProfiler* profilerp);
VlExecutionProfiler* profilerp, VlStartWorkerCb startCb);
~VlWorkerThread();
// METHODS
@ -195,7 +197,6 @@ public:
m_ready.erase(m_ready.begin());
m_ready_size.fetch_sub(1, std::memory_order_relaxed);
}
inline void wakeUp() { addTask(nullptr, nullptr, false); }
inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle)
VL_MT_SAFE_EXCLUDES(m_mutex) {
bool notify;
@ -207,9 +208,13 @@ public:
}
if (notify) m_cv.notify_one();
}
inline void shutdown() { addTask(shutdownTask, nullptr, false); }
static void shutdownTask(void*, bool);
void workerLoop();
static void startWorker(VlWorkerThread* workerp, uint32_t threadId,
VlExecutionProfiler* profilerp);
VlExecutionProfiler* profilerp, VlStartWorkerCb startCb);
};
class VlThreadPool final {
@ -221,7 +226,8 @@ public:
// Construct a thread pool with 'nThreads' dedicated threads. The thread
// pool will create these threads and make them available to execute tasks
// via this->workerp(index)->addTask(...)
VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp);
VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp,
VlStartWorkerCb startCb);
~VlThreadPool();
// METHODS

View File

@ -38,13 +38,13 @@
# include <unistd.h>
#endif
#ifndef O_LARGEFILE // For example on WIN32
#ifndef O_LARGEFILE // WIN32 headers omit this
# define O_LARGEFILE 0
#endif
#ifndef O_NONBLOCK
#ifndef O_NONBLOCK // WIN32 headers omit this
# define O_NONBLOCK 0
#endif
#ifndef O_CLOEXEC
#ifndef O_CLOEXEC // WIN32 headers omit this
# define O_CLOEXEC 0
#endif

View File

@ -79,14 +79,32 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
// bool indicating if the term is clean (0/1 value, or if the top bits might be dirty)
using ResultTerm = std::tuple<AstNode*, unsigned, bool>;
struct LeafInfo final { // Leaf node (either AstConst or AstVarRef)
class LeafInfo final { // Leaf node (either AstConst or AstVarRef)
bool m_polarity = true;
int m_lsb = 0;
int m_wordIdx = -1; // -1 means AstWordSel is not used.
AstVarRef* m_refp = nullptr;
const AstConst* m_constp = nullptr;
int width() const {
public:
void setLeaf(AstVarRef* refp) {
UASSERT(!m_refp && !m_constp, "Must be called just once");
m_refp = refp;
}
void setLeaf(const AstConst* constp) {
UASSERT(!m_refp && !m_constp, "Must be called just once");
m_constp = constp;
}
AstVarRef* refp() const { return m_refp; }
const AstConst* constp() const { return m_constp; }
int wordIdx() const { return m_wordIdx; }
bool polarity() const { return m_polarity; }
int lsb() const { return m_lsb; }
void wordIdx(int i) { m_wordIdx = i; }
void lsb(int l) { m_lsb = l; }
void polarity(bool p) { m_polarity = p; }
int varWidth() const {
UASSERT(m_refp, "m_refp should be set");
const int width = m_refp->varp()->widthMin();
if (!m_refp->isWide()) {
@ -339,25 +357,25 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
UINFO(9, "Increment to " << m_ops << " " << nodep << " called from line " << line << "\n");
}
VarInfo& getVarInfo(const LeafInfo& ref) {
UASSERT_OBJ(ref.m_refp, m_rootp, "null varref in And/Or/Xor optimization");
AstNode* nodep = ref.m_refp->varScopep();
if (!nodep) nodep = ref.m_refp->varp(); // Not scoped
UASSERT_OBJ(ref.refp(), m_rootp, "null varref in And/Or/Xor optimization");
AstNode* nodep = ref.refp()->varScopep();
if (!nodep) nodep = ref.refp()->varp(); // Not scoped
int baseIdx = nodep->user4();
if (baseIdx == 0) { // Not set yet
baseIdx = m_varInfos.size();
const int numWords
= ref.m_refp->dtypep()->isWide() ? ref.m_refp->dtypep()->widthWords() : 1;
= ref.refp()->dtypep()->isWide() ? ref.refp()->dtypep()->widthWords() : 1;
m_varInfos.resize(m_varInfos.size() + numWords);
nodep->user4(baseIdx);
}
const size_t idx = baseIdx + std::max(0, ref.m_wordIdx);
const size_t idx = baseIdx + std::max(0, ref.wordIdx());
VarInfo* varInfop = m_varInfos[idx].get();
if (!varInfop) {
varInfop = new VarInfo{this, ref.m_refp, ref.width()};
varInfop = new VarInfo{this, ref.refp(), ref.varWidth()};
m_varInfos[idx].reset(varInfop);
} else {
if (!varInfop->sameVarAs(ref.m_refp))
CONST_BITOP_SET_FAILED("different var (scope?)", ref.m_refp);
if (!varInfop->sameVarAs(ref.refp()))
CONST_BITOP_SET_FAILED("different var (scope?)", ref.refp());
}
return *varInfop;
}
@ -373,9 +391,9 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
bool ok = !m_failed;
if (expectConst) {
ok &= !info.m_refp && info.m_constp;
ok &= !info.refp() && info.constp();
} else {
ok &= info.m_refp && !info.m_constp;
ok &= info.refp() && !info.constp();
}
return ok ? info : LeafInfo{};
}
@ -411,22 +429,20 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
CONST_BITOP_RETURN_IF(!m_leafp, nodep);
AstConst* const constp = VN_CAST(nodep->bitp(), Const);
CONST_BITOP_RETURN_IF(!constp, nodep->rhsp());
UASSERT_OBJ(m_leafp->m_wordIdx == -1, nodep, "Unexpected nested WordSel");
m_leafp->m_wordIdx = constp->toSInt();
UASSERT_OBJ(m_leafp->wordIdx() == -1, nodep, "Unexpected nested WordSel");
m_leafp->wordIdx(constp->toSInt());
iterate(nodep->fromp());
}
virtual void visit(AstVarRef* nodep) override {
CONST_BITOP_RETURN_IF(!m_leafp, nodep);
UASSERT_OBJ(!m_leafp->m_refp, nodep, m_leafp->m_refp << " is already set");
m_leafp->m_refp = nodep;
m_leafp->m_polarity = m_polarity;
m_leafp->m_lsb = m_lsb;
m_leafp->setLeaf(nodep);
m_leafp->polarity(m_polarity);
m_leafp->lsb(m_lsb);
}
virtual void visit(AstConst* nodep) override {
CONST_BITOP_RETURN_IF(!m_leafp, nodep);
UASSERT_OBJ(!m_leafp->m_constp, nodep, m_leafp->m_constp << " is already set");
m_leafp->m_constp = nodep;
m_leafp->m_lsb = m_lsb;
m_leafp->setLeaf(nodep);
m_leafp->lsb(m_lsb);
}
virtual void visit(AstRedXor* nodep) override {
@ -438,36 +454,36 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
CONST_BITOP_RETURN_IF(!andp, lhsp);
const LeafInfo& mask = findLeaf(andp->lhsp(), true);
CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp());
CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp());
const LeafInfo& ref = findLeaf(andp->rhsp(), false);
CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp());
CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp());
restorer.disableRestore(); // Now all subtree succeeded
const V3Number& maskNum = mask.m_constp->num();
const V3Number& maskNum = mask.constp()->num();
incrOps(nodep, __LINE__);
incrOps(andp, __LINE__);
// Mark all bits checked in this reduction
const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width());
for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.m_lsb;
const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth());
for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.lsb();
if (maskNum.bitIs0(maskIdx)) continue;
// Set true, m_polarity takes care of the entire parity
m_bitPolarities.emplace_back(ref, true, bitIdx);
}
} else { // '^leaf'
const LeafInfo& ref = findLeaf(lhsp, false);
CONST_BITOP_RETURN_IF(!ref.m_refp, lhsp);
CONST_BITOP_RETURN_IF(!ref.refp(), lhsp);
restorer.disableRestore(); // Now all checks passed
incrOps(nodep, __LINE__);
// Mark all bits checked by this comparison
for (int bitIdx = ref.m_lsb; bitIdx < ref.width(); ++bitIdx) {
for (int bitIdx = ref.lsb(); bitIdx < ref.varWidth(); ++bitIdx) {
m_bitPolarities.emplace_back(ref, true, bitIdx);
}
}
@ -492,7 +508,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
AstNode* opp = right ? nodep->rhsp() : nodep->lhsp();
const bool origFailed = m_failed;
iterate(opp);
if (leafInfo.m_constp || m_failed) {
if (leafInfo.constp() || m_failed) {
// Revert changes in leaf
restorer.restoreNow();
// Reach past a cast then add to frozen nodes to be added to final reduction
@ -502,14 +518,14 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
continue;
}
restorer.disableRestore(); // Now all checks passed
if (leafInfo.m_refp) {
if (leafInfo.refp()) {
// The conditional on the lsb being in range is necessary for some degenerate
// case, e.g.: (IData)((QData)wide[0] >> 32), or <1-bit-var> >> 1, which is
// just zero
if (leafInfo.m_lsb < leafInfo.width()) {
m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.m_polarity,
leafInfo.m_lsb);
} else if (isAndTree() && leafInfo.m_polarity) {
if (leafInfo.lsb() < leafInfo.varWidth()) {
m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.polarity(),
leafInfo.lsb());
} else if (isAndTree() && leafInfo.polarity()) {
// If there is a constant 0 term in an And tree, we must include it. Fudge
// this by adding a bit with both polarities, which will simplify to zero
m_bitPolarities.emplace_back(leafInfo, true, 0);
@ -530,38 +546,38 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
if (const AstAnd* const andp = VN_CAST(nodep->rhsp(), And)) { // comp == (mask & v)
const LeafInfo& mask = findLeaf(andp->lhsp(), true);
CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp());
CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp());
const LeafInfo& ref = findLeaf(andp->rhsp(), false);
CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp());
CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp());
restorer.disableRestore(); // Now all checks passed
const V3Number& maskNum = mask.m_constp->num();
const V3Number& maskNum = mask.constp()->num();
incrOps(nodep, __LINE__);
incrOps(andp, __LINE__);
// Mark all bits checked by this comparison
const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width());
for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.m_lsb;
const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth());
for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.lsb();
if (maskNum.bitIs0(maskIdx)) continue;
const bool polarity = compNum.bitIs1(maskIdx) != maskFlip;
m_bitPolarities.emplace_back(ref, polarity, bitIdx);
}
} else { // comp == v
const LeafInfo& ref = findLeaf(nodep->rhsp(), false);
CONST_BITOP_RETURN_IF(!ref.m_refp, nodep->rhsp());
CONST_BITOP_RETURN_IF(!ref.refp(), nodep->rhsp());
restorer.disableRestore(); // Now all checks passed
incrOps(nodep, __LINE__);
// Mark all bits checked by this comparison
const int maxBitIdx = std::min(ref.m_lsb + compNum.width(), ref.width());
for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.m_lsb;
const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.varWidth());
for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) {
const int maskIdx = bitIdx - ref.lsb();
const bool polarity = compNum.bitIs1(maskIdx) != maskFlip;
m_bitPolarities.emplace_back(ref, polarity, bitIdx);
}

View File

@ -694,8 +694,8 @@ void EmitCSyms::emitSymImp() {
puts("}\n\n");
// Constructor
puts(symClassName() + "::" + symClassName() + "(VerilatedContext* contextp, const char* namep,"
+ topClassName() + "* modelp)\n");
puts(symClassName() + "::" + symClassName()
+ "(VerilatedContext* contextp, const char* namep, " + topClassName() + "* modelp)\n");
puts(" : VerilatedSyms{contextp}\n");
puts(" // Setup internal state of the Syms class\n");
puts(" , __Vm_modelp{modelp}\n");
@ -724,7 +724,10 @@ void EmitCSyms::emitSymImp() {
// duration of the eval call.
puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, "
+ cvtToStr(v3Global.opt.threads() - 1) + ", "
+ (v3Global.opt.profExec() ? "&__Vm_executionProfiler" : "nullptr") + "}}\n");
+ (v3Global.opt.profExec()
? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup"
: "nullptr, nullptr")
+ "}}\n");
}
puts(" // Setup module instances\n");
@ -965,7 +968,8 @@ void EmitCSyms::emitSymImp() {
}
closeSplit();
VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr);
m_ofp = nullptr;
VL_DO_CLEAR(delete m_ofpBase, m_ofpBase = nullptr);
}
//######################################################################

View File

@ -197,7 +197,6 @@ public:
of.puts("# User CFLAGS (from -CFLAGS on Verilator command line)\n");
of.puts("VM_USER_CFLAGS = \\\n");
if (!v3Global.opt.libCreate().empty()) of.puts("\t-fPIC \\\n");
if (v3Global.opt.usesProfiler()) of.puts("\t-DVL_PROFILER \\\n");
const V3StringList& cFlags = v3Global.opt.cFlags();
for (const string& i : cFlags) of.puts("\t" + i + " \\\n");
of.puts("\n");

View File

@ -920,13 +920,16 @@ void V3OutFormatter::printf(const char* fmt...) {
// V3OutFormatter: A class for printing to a file, with automatic indentation of C++ code.
V3OutFile::V3OutFile(const string& filename, V3OutFormatter::Language lang)
: V3OutFormatter{filename, lang} {
: V3OutFormatter{filename, lang}
, m_bufferp{new std::array<char, WRITE_BUFFER_SIZE_BYTES>{}} {
if ((m_fp = V3File::new_fopen_w(filename)) == nullptr) {
v3fatal("Cannot write " << filename);
}
}
V3OutFile::~V3OutFile() {
writeBlock();
if (m_fp) fclose(m_fp);
m_fp = nullptr;
}

View File

@ -22,6 +22,7 @@
#include "V3Error.h"
#include <array>
#include <stack>
#include <set>
#include <list>
@ -183,18 +184,56 @@ public:
// V3OutFile: A class for printing to a file, with automatic indentation of C++ code.
class V3OutFile VL_NOT_FINAL : public V3OutFormatter {
// Size of m_bufferp.
// 128kB has been experimentally determined to be in the zone of buffer sizes that work best.
// It is also considered to be the smallest I/O buffer size in GNU coreutils (io_blksize) that
// allows to best minimize syscall overhead.
// The hard boundaries are CPU L2/L3 cache size on the top and filesystem block size
// on the bottom.
static constexpr std::size_t WRITE_BUFFER_SIZE_BYTES = 128 * 1024;
// MEMBERS
std::unique_ptr<std::array<char, WRITE_BUFFER_SIZE_BYTES>> m_bufferp; // Write buffer
std::size_t m_usedBytes = 0; // Number of bytes stored in m_bufferp
FILE* m_fp = nullptr;
public:
V3OutFile(const string& filename, V3OutFormatter::Language lang);
V3OutFile(const V3OutFile&) = delete;
V3OutFile& operator=(const V3OutFile&) = delete;
V3OutFile(V3OutFile&&) = delete;
V3OutFile& operator=(V3OutFile&&) = delete;
virtual ~V3OutFile() override;
void putsForceIncs();
private:
void writeBlock() {
if (VL_LIKELY(m_usedBytes > 0)) fwrite(m_bufferp->data(), m_usedBytes, 1, m_fp);
m_usedBytes = 0;
}
// CALLBACKS
virtual void putcOutput(char chr) override { fputc(chr, m_fp); }
virtual void putsOutput(const char* str) override { fputs(str, m_fp); }
virtual void putcOutput(char chr) override {
m_bufferp->at(m_usedBytes++) = chr;
if (VL_UNLIKELY(m_usedBytes >= WRITE_BUFFER_SIZE_BYTES)) writeBlock();
}
virtual void putsOutput(const char* str) override {
std::size_t len = strlen(str);
std::size_t availableBytes = WRITE_BUFFER_SIZE_BYTES - m_usedBytes;
while (VL_UNLIKELY(len >= availableBytes)) {
memcpy(m_bufferp->data() + m_usedBytes, str, availableBytes);
m_usedBytes = WRITE_BUFFER_SIZE_BYTES;
writeBlock();
str += availableBytes;
len -= availableBytes;
availableBytes = WRITE_BUFFER_SIZE_BYTES;
}
if (len > 0) {
memcpy(m_bufferp->data() + m_usedBytes, str, len);
m_usedBytes += len;
}
}
};
class V3OutCFile VL_NOT_FINAL : public V3OutFile {

View File

@ -150,7 +150,7 @@ def clean_output(filename, outname, is_output, is_c):
lines = out
out = []
with open(outname, "w") as fh:
with open(outname, "w", encoding="utf-8") as fh:
for line in lines:
# Fix filename refs
line = re.sub(basename, newbase, line)

View File

@ -1110,11 +1110,6 @@ sub compile {
return 1;
}
if ($self->{vltmt} && !$self->cfg_with_threaded) {
$self->skip("Test requires Verilator configured with threads\n");
return 1;
}
if ($param{verilator_make_cmake} && !$self->have_cmake) {
$self->skip("Test requires CMake; ignore error since not available or version too old\n");
return 1;
@ -2340,10 +2335,6 @@ sub cxx_version {
return $_Cxx_Version;
}
sub cfg_with_threaded {
return 1; # C++11 now always required
}
our $_Cfg_with_ccache;
sub cfg_with_ccache {

View File

@ -17,10 +17,8 @@ compile(
verilator_flags2 => ["--cc",
"--coverage-toggle --coverage-line --coverage-user",
"--trace --vpi ",
($Self->cfg_with_threaded
? "--threads 2 $root/include/verilated_threads.cpp" : ""),
($Self->cfg_with_threaded
? "--trace-threads 1" : ""),
"--threads 2",
"--trace-threads 1",
"--prof-exec", "--prof-pgo",
"$root/include/verilated_save.cpp"],
);
@ -58,7 +56,7 @@ foreach my $file (sort keys %hit) {
&& $file !~ /_sc/
&& $file !~ /_fst/
&& $file !~ /_heavy/
&& ($file !~ /_thread/ || $Self->cfg_with_threaded)) {
&& ($file !~ /_thread/)) {
error("Include file not covered by t_verilated_all test: ", $file);
}
}