From f00ff61559be0c6a5cbd07f25e264ce3e8652145 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 31 Jan 2020 19:13:55 -0500 Subject: [PATCH] Link Verilator binary partially statically, Closes #2146. The build is now by default configured to link performance critical libraries (libgcc, libstdc++, libtcmalloc) statically. This improves Verilation speed by between 4.5-7% based on my measurements as it eliminates approx 20% of the mispredicted branches from the execution. With partial static linking, the size of the .text section in verilator_bin is increased by about 14%, and the binary is itself only about 800KB bigger on disk, so hopefully this is not a big issue in exchange for the faster compilation speed. A configure option "--disable-partial-static" is provided to restore the old behaviour of linking everything dynamically. Note: This patch also changes to use libtcmalloc_minimal, which is all we really need and itself has fewer dependencies. --- Changes | 2 ++ configure.ac | 44 +++++++++++++++++++++++++++++++++++++++----- src/Makefile_obj.in | 5 +++-- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/Changes b/Changes index b62f845a3..4a99d0189 100644 --- a/Changes +++ b/Changes @@ -9,6 +9,8 @@ The contributors that suggested a given feature are shown in []. Thanks! ** Add -match to lint_off to waive warnings. [Philipp Wagner] +*** Link Verilator binary partially statically, #2146. [Geza Lore] + *** Verilation speed improvements, #2133, #2138. [Geza Lore] *** Support libgoogle-perftools-dev's libtcmalloc if available, #2137. [Geza Lore] diff --git a/configure.ac b/configure.ac index e8f7bf961..51eec8f82 100644 --- a/configure.ac +++ b/configure.ac @@ -25,10 +25,27 @@ AC_ARG_ENABLE([maintainer-mode], AC_ARG_ENABLE([silent-rules], [AS_HELP_STRING([--disable-silent-rules], [ignored])]) +# Flag to enable linking specific libraries statically +AC_MSG_CHECKING(whether to perform partial static linking of Verilator binary) +AC_ARG_ENABLE([partial-static], + [AS_HELP_STRING([--disable-partial-static], + [By default, for Verilation peformance, Verilator + is linked against some of its dependencies + statically. Use this to link the Verilator binary + fully dynamically.])], + [case "${enableval}" in + yes) CFG_ENABLE_PARTIAL_STATIC=yes ;; + no) CFG_ENABLE_PARTIAL_STATIC=no ;; + *) AC_MSG_ERROR([bad value '${enableval}' for --disable-partial-static]) ;; + esac], + CFG_ENABLE_PARTIAL_STATIC=yes) +AC_MSG_RESULT($CFG_ENABLE_PARTIAL_STATIC) + # Flag to enable linking Verilator with tcmalloc if available +AC_MSG_CHECKING(whether to use tcmalloc) AC_ARG_ENABLE([tcmalloc], [AS_HELP_STRING([--enable-tcmalloc], - [Use libtcmalloc for faster dynamic memory + [Use libtcmalloc_minimal for faster dynamic memory management in Verilator binary@<:@default=check@:>@])], [case "${enableval}" in yes) CFG_WITH_TCMALLOC=yes ;; @@ -36,6 +53,7 @@ AC_ARG_ENABLE([tcmalloc], *) AC_MSG_ERROR([bad value '${enableval}' for --enable-tcmalloc]) ;; esac], [CFG_WITH_TCMALLOC=check;]) +AC_MSG_RESULT($CFG_WITH_TCMALLOC) # Special Substitutions - CFG_WITH_DEFENV AC_MSG_CHECKING(whether to use hardcoded paths) @@ -285,6 +303,7 @@ _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-faligned-new) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-unused-parameter) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-undefined-bool-conversion) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-Wno-shadow) +AC_SUBST(CFG_CXXFLAGS_SRC) # Flags for compiling Verilator parser always (in addition to above CFG_CXXFLAGS_SRC) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_PARSER,-Wno-char-subscripts) @@ -336,21 +355,36 @@ m4_foreach([ldflag], [ AC_SUBST(CFG_LDLIBS_THREADS) AC_SUBST(CFG_LDFLAGS_THREADS_CMAKE) +# When linking partially statically +if test "$CFG_ENABLE_PARTIAL_STATIC" = "yes"; then + _MY_LDLIBS_CHECK_OPT(CFG_LDFLAGS_SRC, -static-libgcc) + _MY_LDLIBS_CHECK_OPT(CFG_LDFLAGS_SRC, -static-libstdc++) + _MY_LDLIBS_CHECK_OPT(CFG_LDFLAGS_SRC, -Xlinker -gc-sections) + LTCMALLOC=-l:libtcmalloc_minimal.a +else + LTCMALLOC=-ltcmalloc_minimal +fi +AC_SUBST(CFG_LDFLAGS_SRC) + +# The pthread library is required by tcmalloc, so add it if it exists. If it +# does not, the tcmalloc check below will fail anyway, and linking against +# pthreads is harmless otherwise. +_MY_LDLIBS_CHECK_OPT(CFG_LIBS, -lpthread) + # Check if tcmalloc is available based on --enable-tcmalloc _MY_LDLIBS_CHECK_IFELSE( - -ltcmalloc, + $LTCMALLOC, [if test "$CFG_WITH_TCMALLOC" != "no"; then - CFG_LIBS="$CFG_LIBS -ltcmalloc"; + CFG_LIBS="$LTCMALLOC $CFG_LIBS"; # If using tcmalloc, add some extra options to make the compiler not assume # it is using it's own versions of the standard library functions _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-malloc) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-calloc) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-realloc) _MY_CXX_CHECK_OPT(CFG_CXXFLAGS_SRC,-fno-builtin-free) - AC_SUBST(CFG_CXXFLAGS_SRC) fi], [if test "$CFG_WITH_TCMALLOC" = "yes"; then - AC_MSG_ERROR([--enable-tcmalloc was given but test for -ltcmalloc failed]) + AC_MSG_ERROR([--enable-tcmalloc was given but test for ${LTCMALLOC} failed]) fi]) AC_SUBST(CFG_LIBS) diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index 2225b5b00..e9274cb1b 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -68,6 +68,7 @@ CFG_CXXFLAGS_SRC = @CFG_CXXFLAGS_SRC@ CFG_CXXFLAGS_PARSER = @CFG_CXXFLAGS_PARSER@ # Compiler flags that turn on extra warnings CFG_CXXFLAGS_WEXTRA = @CFG_CXXFLAGS_WEXTRA@ +CFG_LDFLAGS_SRC = @CFG_LDFLAGS_SRC@ CFG_LIBS = @CFG_LIBS@ #### End of system configuration section. #### @@ -92,8 +93,7 @@ endif #CCMALLOC = /usr/local/lib/ccmalloc-gcc.o -lccmalloc -ldl # -lfl not needed as Flex invoked with %nowrap option -# -lstdc++ needed for clang, believed harmless with gcc -LIBS = $(CFG_LIBS) -lm -lstdc++ +LIBS = $(CFG_LIBS) -lm CPPFLAGS += -MMD CPPFLAGS += -I. -I$(bldsrc) -I$(srcdir) -I$(incdir) -I../../include @@ -107,6 +107,7 @@ CPPFLAGS += -W -Wall $(CFG_CXXFLAGS_WEXTRA) $(CFG_CXXFLAGS_SRC) -Werror else CPPFLAGS += $(CFG_CXXFLAGS_SRC) endif +LDFLAGS += $(CFG_LDFLAGS_SRC) CPPFLAGSWALL = $(CPPFLAGS) CPPFLAGSPARSER = $(CPPFLAGS) $(CFG_CXXFLAGS_PARSER)