Support libgoogle-perftools-dev's libtcmalloc if available. #2137.

As Verilator continuously allocates and releases small objects (e.g.:
AstNode, V3GraphVertex, V3GraphEdge), it spends a significant amount of
time in malloc/free and friends. This patch adds the --enable-tcmalloc
configure option to link Verilator against the high performance malloc
implementation library libtcmalloc. The default is to use libtcmalloc if
available on the system. Note that there are no source code change, we
are simply replacing the standard library memory allocation functions.

Measured major compilation speed improvement of 27% when running
Verilator with -O3 on a large design.
This commit is contained in:
Geza Lore 2020-01-23 17:32:19 -05:00 committed by Wilson Snyder
parent 5d037c3c8c
commit 7ab2bdb6bb
5 changed files with 55 additions and 7 deletions

View File

@ -27,6 +27,7 @@ before_install:
# Perl modules needed for testing # Perl modules needed for testing
- yes yes | sudo cpan -fi Unix::Processors Parallel::Forker Bit::Vector - yes yes | sudo cpan -fi Unix::Processors Parallel::Forker Bit::Vector
- sudo apt-get install gdb gtkwave - sudo apt-get install gdb gtkwave
- sudo apt-get install libgoogle-perftools-dev
before_script: before_script:
- bash -x ci/build_vcddiff.sh - bash -x ci/build_vcddiff.sh
- bash -x ci/build_verilator.sh - bash -x ci/build_verilator.sh

View File

@ -9,6 +9,8 @@ The contributors that suggested a given feature are shown in []. Thanks!
** Add -match to lint_off to waive warnings. [Philipp Wagner] ** Add -match to lint_off to waive warnings. [Philipp Wagner]
*** Support libgoogle-perftools-dev's libtcmalloc if available. #2137. [Geza Lore]
*** Support $readmem/$writemem with assoc arrarys. Closes #2100. [agrobman] *** Support $readmem/$writemem with assoc arrarys. Closes #2100. [agrobman]
**** Support left justified $display. Closes #2101. [Pieter Kapsenberg] **** Support left justified $display. Closes #2101. [Pieter Kapsenberg]

View File

@ -25,6 +25,18 @@ AC_ARG_ENABLE([maintainer-mode],
AC_ARG_ENABLE([silent-rules], AC_ARG_ENABLE([silent-rules],
[AS_HELP_STRING([--disable-silent-rules], [ignored])]) [AS_HELP_STRING([--disable-silent-rules], [ignored])])
# Flag to enable linking Verilator with tcmalloc if available
AC_ARG_ENABLE([tcmalloc],
[AS_HELP_STRING([--enable-tcmalloc],
[Use libtcmalloc for faster dynamic memory
management @<:@default=check@:>@])],
[case "${enableval}" in
yes) CFG_WITH_TCMALLOC=yes ;;
no) CFG_WITH_TCMALLOC=no ;;
*) AC_MSG_ERROR([bad value '${enableval}' for --enable-tcmalloc]) ;;
esac],
[CFG_WITH_TCMALLOC=check;])
# Special Substitutions - CFG_WITH_DEFENV # Special Substitutions - CFG_WITH_DEFENV
AC_MSG_CHECKING(whether to use hardcoded paths) AC_MSG_CHECKING(whether to use hardcoded paths)
AC_ARG_ENABLE([defenv], AC_ARG_ENABLE([defenv],
@ -209,15 +221,26 @@ AC_DEFUN([_MY_LDLIBS_CHECK_FLAG],
LIBS="$ACO_SAVE_LIBS" LIBS="$ACO_SAVE_LIBS"
]) ])
AC_DEFUN([_MY_LDLIBS_CHECK_OPT], AC_DEFUN([_MY_LDLIBS_CHECK_IFELSE],
[# _MY_LDLIBS_CHECK_OPT(flag) -- Check if linker supports specific options [# _MY_LDLIBS_CHECK_IFELSE(flag,action-if-supported,action-if-not-supported)
# If it does, append flag to variable # Check if linker supports specific flag, if it does do action-if-supported
_MY_LDLIBS_CHECK_FLAG($2) # otherwise do action-if-not-supported
_MY_LDLIBS_CHECK_FLAG($1)
if test "$_my_result" = "yes" ; then if test "$_my_result" = "yes" ; then
$1="$$1 $2" true
$2
else
true
$3
fi fi
]) ])
AC_DEFUN([_MY_LDLIBS_CHECK_OPT],
[# _MY_LDLIBS_CHECK_OPT(variable, flag) -- Check if linker supports specific
# options. If it does, append flag to variable.
_MY_LDLIBS_CHECK_IFELSE($2, $1="$$1 $2")
])
# Flag to select newest language standard supported # Flag to select newest language standard supported
# Macros work such that first option that passes is the one we take # Macros work such that first option that passes is the one we take
# gnu++17 code is clean, but SystemC in 2018 doesn't link with it (bug1339) # gnu++17 code is clean, but SystemC in 2018 doesn't link with it (bug1339)
@ -258,7 +281,6 @@ _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-faligned-new)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-unused-parameter) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-unused-parameter)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-undefined-bool-conversion) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-undefined-bool-conversion)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-shadow) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-shadow)
AC_SUBST(CFG_CXXFLAGS_SRC)
# Flags for compiling Verilator parser always (in addition to above CFG_CXXFLAGS_SRC) # Flags for compiling Verilator parser always (in addition to above CFG_CXXFLAGS_SRC)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_PARSER,-Wno-char-subscripts) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_PARSER,-Wno-char-subscripts)
@ -310,6 +332,24 @@ m4_foreach([ldflag], [
AC_SUBST(CFG_LDLIBS_THREADS) AC_SUBST(CFG_LDLIBS_THREADS)
AC_SUBST(CFG_LDFLAGS_THREADS_CMAKE) AC_SUBST(CFG_LDFLAGS_THREADS_CMAKE)
# Check if tcmalloc is available based on --enable-tcmalloc
_MY_LDLIBS_CHECK_IFELSE(
-ltcmalloc,
[if test "$CFG_WITH_TCMALLOC" != "no"; then
CFG_LIBS="$CFG_LIBS -ltcmalloc";
# If using tcmalloc, add some extra options to make the compiler not assume
# it is using it's own versions of the standard library functions
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-malloc)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-calloc)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-realloc)
_MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-free)
AC_SUBST(CFG_CXXFLAGS_SRC)
fi],
[if test "$CFG_WITH_TCMALLOC" = "yes"; then
AC_MSG_ERROR([--enable-tcmalloc was given but test for -ltcmalloc failed])
fi])
AC_SUBST(CFG_LIBS)
# Set CFG_WITH_THREADED if can support threading # Set CFG_WITH_THREADED if can support threading
AC_MSG_CHECKING(whether $CXX supports Verilated threads) AC_MSG_CHECKING(whether $CXX supports Verilated threads)
ACO_SAVE_CXXFLAGS="$CXXFLAGS" ACO_SAVE_CXXFLAGS="$CXXFLAGS"

View File

@ -84,6 +84,10 @@ To build Verilator you will need to install some standard packages:
sudo apt-get install autoconf sudo apt-get install autoconf
sudo apt-get install flex bison sudo apt-get install flex bison
The following are optional, but improve compilation speed:
sudo apt-get install libgoogle-perftools-dev
Additionally, to build or run Verilator you need these standard packages: Additionally, to build or run Verilator you need these standard packages:
sudo apt-get install perl python3 sudo apt-get install perl python3

View File

@ -68,6 +68,7 @@ CFG_CXXFLAGS_SRC = @CFG_CXXFLAGS_SRC@
CFG_CXXFLAGS_PARSER = @CFG_CXXFLAGS_PARSER@ CFG_CXXFLAGS_PARSER = @CFG_CXXFLAGS_PARSER@
# Compiler flags that turn on extra warnings # Compiler flags that turn on extra warnings
CFG_CXXFLAGS_WEXTRA = @CFG_CXXFLAGS_WEXTRA@ CFG_CXXFLAGS_WEXTRA = @CFG_CXXFLAGS_WEXTRA@
CFG_LIBS = @CFG_LIBS@
#### End of system configuration section. #### #### End of system configuration section. ####
@ -92,7 +93,7 @@ endif
# -lfl not needed as Flex invoked with %nowrap option # -lfl not needed as Flex invoked with %nowrap option
# -lstdc++ needed for clang, believed harmless with gcc # -lstdc++ needed for clang, believed harmless with gcc
LIBS = -lm -lstdc++ LIBS = $(CFG_LIBS) -lm -lstdc++
CPPFLAGS += -MMD CPPFLAGS += -MMD
CPPFLAGS += -I. -I$(bldsrc) -I$(srcdir) -I$(incdir) -I../../include CPPFLAGS += -I. -I$(bldsrc) -I$(srcdir) -I$(incdir) -I../../include