Add prepareClone and atClone APIs for Verilated models (#3503) (#4444)

This API is used if the user copies the process using `fork`
and similar OS-level mechanisms. The `at_clone` member function
ensures that all model-allocated resources are re-allocated, such
that the copied child process/model can simulate correctly.

A typical allocated resource is the thread pool, which every model
has its own pool.
This commit is contained in:
Yinan Xu 2023-08-30 19:02:55 +08:00 committed by GitHub
parent ca6ab584d0
commit b4b74d72f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 232 additions and 1 deletions

View File

@ -128,6 +128,43 @@ in the distribution. These headers use Doxygen comments, `///` and `//<`,
to indicate and document those functions that are part of the Verilated
public API.
Process-Level Clone APIs
--------------------------
Modern operating systems support process-level clone (a.k.a copying, forking)
with system call interfaces in C/C++, e.g., :code:`fork()` in Linux.
However, after cloning a parent process, some resources cannot be inherited
in the child process. For example, in POSIX systems, when you fork a process,
the child process inherits all the memory of the parent process. However,
only the thread that called fork is replicated in the child process. Other
threads are not.
Therefore, to support the process-level clone mechanisms, Verilator supports
:code:`prepareClone()` and :code:`atClone()` APIs to allow the user to manually
re-construct the model in the child process. The two APIs handle all necessary
resources required for releasing and re-initializing before and after cloning.
The two APIs are supported in the verilated models. Here is an example of usage
with Linux :code:`fork()` and :code:`pthread_atfork` APIs:
.. code-block:: C++
// static function pointers to fit pthread_atfork
static auto prepareClone = [](){ topp->prepareClone(); };
static auto atClone = [](){ topp->atClone(); };
// in main function, register the handlers:
pthread_atfork(prepareClone, atClone, atClone);
For better flexibility, you can also manually call the handlers before and
after :code:`fork()`.
With the process-level clone APIs, users can create process-level snapshots
for the verilated models. While the Verilator save/restore option provides
persistent and circuit-independent snapshots, the process-level clone APIs
enable in-memory, circuit-transparent, and highly efficient snapshots.
Direct Programming Interface (DPI)
==================================

View File

@ -2608,6 +2608,14 @@ VerilatedVirtualBase* VerilatedContext::threadPoolp() {
return m_threadPool.get();
}
void VerilatedContext::prepareClone() { delete m_threadPool.release(); }
VerilatedVirtualBase* VerilatedContext::threadPoolpOnClone() {
if (VL_UNLIKELY(m_threadPool)) m_threadPool.release();
m_threadPool = std::make_unique<VlThreadPool>(this, m_threads - 1);
return m_threadPool.get();
}
VerilatedVirtualBase*
VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) {
if (!m_executionProfiler) m_executionProfiler.reset(construct(*this));

View File

@ -568,6 +568,8 @@ public:
void addModel(VerilatedModel*);
VerilatedVirtualBase* threadPoolp();
void prepareClone();
VerilatedVirtualBase* threadPoolpOnClone();
VerilatedVirtualBase*
enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&));

View File

@ -234,6 +234,12 @@ class EmitCModel final : public EmitCFunc {
puts("const char* hierName() const override final;\n");
puts("const char* modelName() const override final;\n");
puts("unsigned threads() const override final;\n");
puts("/// Prepare for cloning the model at the process level (e.g. fork in Linux)\n");
puts("/// Release necessary resources. Called before cloning.\n");
puts("void prepareClone() const;\n");
puts("/// Re-init after cloning the model at the process level (e.g. fork in Linux)\n");
puts("/// Re-allocate necessary resources. Called after cloning.\n");
puts("void atClone() const;\n");
if (v3Global.opt.trace()) {
puts("std::unique_ptr<VerilatedTraceConfig> traceConfig() const override final;\n");
}
@ -479,6 +485,15 @@ class EmitCModel final : public EmitCFunc {
+ "\"; }\n");
puts("unsigned " + topClassName() + "::threads() const { return "
+ cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n");
puts("void " + topClassName()
+ "::prepareClone() const { contextp()->prepareClone(); }\n");
puts("void " + topClassName() + "::atClone() const {\n");
if (v3Global.opt.threads() > 1) {
puts("vlSymsp->__Vm_threadPoolp = static_cast<VlThreadPool*>(");
}
puts("contextp()->threadPoolpOnClone()");
if (v3Global.opt.threads() > 1) puts(")");
puts(";\n}\n");
if (v3Global.opt.trace()) {
puts("std::unique_ptr<VerilatedTraceConfig> " + topClassName()

View File

@ -468,7 +468,7 @@ void EmitCSyms::emitSymHdr() {
if (v3Global.opt.mtasks()) {
puts("\n// MULTI-THREADING\n");
puts("VlThreadPool* const __Vm_threadPoolp;\n");
puts("VlThreadPool* __Vm_threadPoolp;\n");
puts("bool __Vm_even_cycle__ico = false;\n");
puts("bool __Vm_even_cycle__act = false;\n");
puts("bool __Vm_even_cycle__nba = false;\n");

View File

@ -97,6 +97,8 @@ sub check_cpp {
&& $func !~ /::traceInit$/
&& $func !~ /::traceFull$/
&& $func !~ /::final$/
&& $func !~ /::prepareClone$/
&& $func !~ /::atClone$/
) {
push @funcs, $func;
}

View File

@ -0,0 +1,89 @@
//
// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
//
// This file ONLY is placed into the Public Domain, for any use,
// without warranty, 2023 by Yinan Xu.
// SPDX-License-Identifier: CC0-1.0
#include <verilated.h>
#include <unistd.h>
#include <sys/wait.h>
// These require the above. Comment prevents clang-format moving them
#include "TestCheck.h"
#include VM_PREFIX_INCLUDE
double sc_time_stamp() { return 0; }
// Note: Since the pthread_atfork API accepts only function pointers,
// we are using a static variable for the TOP just for a simple example.
// Without using the pthread_atfork API, the user can instead manually call
// prepareClone and atClone before and after calling fork, and topp can be
// allocated dynamically.
static VM_PREFIX* topp = nullptr;
static auto prepareClone = []() { topp->prepareClone(); };
static auto atClone = []() { topp->atClone(); };
void single_cycle(VM_PREFIX* topp) {
topp->clock = 1;
topp->eval();
topp->clock = 0;
topp->eval();
}
int main(int argc, char** argv) {
// We disable the buffering for stdout in this test.
// Redirecting the stdout to files with buffering causes duplicated stdout
// outputs in both parent and child processes, even if they are actually
// called before the fork.
setvbuf(stdout, nullptr, _IONBF, 0);
VerilatedContext* contextp = new VerilatedContext;
topp = new VM_PREFIX{contextp};
// To avoid resource leaks, prepareClone must be called before fork to
// free all the allocated resources. Though this would bring performance
// overhead to the parent process, we believe that fork should not be
// called frequently, and the overhead is minor compared to simulation.
pthread_atfork(prepareClone, atClone, atClone);
// If you care about critical performance, prepareClone can be avoided,
// with atClone being called only at the child process, as follows.
// It has the same functionality as the previous one, but has memory leaks.
// According to the sanitizer, 288 bytes are leaked for one fork call.
// pthread_atfork(nullptr, nullptr, atClone);
topp->reset = 1;
topp->is_parent = 0;
for (int i = 0; i < 5; i++) { single_cycle(topp); }
topp->reset = 0;
while (!contextp->gotFinish()) {
single_cycle(topp);
if (topp->do_clone) {
const int pid = fork();
if (pid < 0) {
printf("fork failed\n");
} else if (pid == 0) {
printf("child: here we go\n");
} else {
while (wait(nullptr) > 0)
;
printf("parent: here we go\n");
topp->is_parent = 1;
}
}
}
topp->final();
VL_DO_DANGLING(delete topp, topp);
VL_DO_DANGLING(delete contextp, contextp);
return 0;
}

View File

@ -0,0 +1,15 @@
counter = 0
counter = 1
counter = 2
counter = 3
counter = 4
counter = 5
child: here we go
counter = 6
counter = 7
counter = 8
parent: here we go
counter = 6
counter = 7
counter = 8
*-* All Finished *-*

View File

@ -0,0 +1,25 @@
#!/usr/bin/env perl
if (!$::Driver) { use strict; use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
# DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
#
# This file ONLY is placed into the Public Domain, for any use,
# without warranty, 2023 by Yinan Xu.
# SPDX-License-Identifier: CC0-1.0
scenarios(vlt_all => 1);
compile(
make_top_shell => 0,
make_main => 0,
verilator_flags2 => ["--exe $Self->{t_dir}/$Self->{name}.cpp",
"-cc"],
threads => $Self->{vltmt} ? 2 : 1,
);
execute(
check_finished => 1,
expect_filename => $Self->{golden_filename},
);
ok(1);
1;

View File

@ -0,0 +1,38 @@
// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
//
// This model counts from 0 to 8. It forks a child process (in C++) at 6
// and waits for the child to simulate and exit for resumption (of the parent).
//
// This file ONLY is placed into the Public Domain, for any use,
// without warranty, 2023 by Yinan Xu.
// SPDX-License-Identifier: CC0-1.0
module top(
input clock,
input reset,
input is_parent,
output do_clone
);
reg [3:0] counter;
assign do_clone = counter == 4'h6;
always @(posedge clock) begin
if (reset) begin
counter <= 4'h0;
end
else begin
counter <= counter + 4'h1;
$write("counter = %d\n", counter);
end
if (counter[3]) begin
if (is_parent) begin
$write("*-* All Finished *-*\n");
end
$finish(0);
end
end
endmodule