diff --git a/Changes b/Changes index 23c6c9157..f8ca13d87 100644 --- a/Changes +++ b/Changes @@ -11,6 +11,8 @@ indicates the contributor was also the author of the fix; Thanks! ** SystemPerl mode is deprecated and now untested. +*** Inline C functions that are used only once, msg1525. [Jie Xu] + * Verilator 3.866 2014-11-15 diff --git a/bin/verilator b/bin/verilator index a40a629f9..f6a8e7ad6 100755 --- a/bin/verilator +++ b/bin/verilator @@ -1399,6 +1399,10 @@ especially if you link in DPI code. To enable LTO on GCC, pass "-flto" in both compilation and link. Note LTO may cause excessive compile times on large designs. +If you are using your own makefiles, you may want to compile the Verilated +code with -DVL_INLINE_OPT=inline. This will inline functions, however this +requires that all cpp files be compiled in a single compiler run. + You may uncover further tuning possibilities by profiling the Verilog code. Use Verilator's --profile-cfuncs, then GCC's -g -pg. You can then run either oprofile or gprof to see where in the C++ code the time is spent. diff --git a/bin/verilator_includer b/bin/verilator_includer index 28d50c8db..59bf66f10 100755 --- a/bin/verilator_includer +++ b/bin/verilator_includer @@ -12,5 +12,9 @@ require 5.005; use warnings; print "// DESCR"."IPTION: Generated by verilator_includer via makefile\n"; foreach my $param (@ARGV) { - print "#include \"$param\"\n" + if ($param =~ /^-D([^=]+)=(.*)/) { + print "#define $1 $2\n" + } else { + print "#include \"$param\"\n" + } } diff --git a/include/verilated.mk.in b/include/verilated.mk.in index 87feb6df8..9412f9cbc 100644 --- a/include/verilated.mk.in +++ b/include/verilated.mk.in @@ -23,7 +23,9 @@ CFG_CXXFLAGS_NO_UNUSED = @CFG_CXXFLAGS_NO_UNUSED@ # Programs SP_PREPROC = sp_preproc -SP_INCLUDER = $(PERL) $(VERILATOR_ROOT)/bin/verilator_includer +SP_INCLUDER = $(VERILATOR_INCLUDER) +VERILATOR_COVERAGE = $(PERL) $(VERILATOR_ROOT)/bin/verilator_coverage +VERILATOR_INCLUDER = $(PERL) $(VERILATOR_ROOT)/bin/verilator_includer ###################################################################### # Make checks @@ -151,15 +153,15 @@ VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW)) -ifneq ($(VM_PARALLEL_BUILDS),1) +ifneq ($(VM_PARALLEL_BUILDS),0) # Fast building, all .cpp's in one fell swoop # This saves about 5 sec per module, but can be slower if only a little changes VK_OBJS += $(VM_PREFIX)__ALLcls.o $(VM_PREFIX)__ALLsup.o all_cpp: $(VM_PREFIX)__ALLcls.cpp $(VM_PREFIX)__ALLsup.cpp $(VM_PREFIX)__ALLcls.cpp: $(VK_CLASSES_CPP) - $(SP_INCLUDER) $^ > $@ + $(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@ $(VM_PREFIX)__ALLsup.cpp: $(VK_SUPPORT_CPP) - $(SP_INCLUDER) $^ > $@ + $(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@ else #Slow way of building... Each .cpp file by itself VK_OBJS += $(addsuffix .o, $(VM_CLASSES) $(VM_SUPPORT)) diff --git a/include/verilatedos.h b/include/verilatedos.h index c2068f101..a82eb5906 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -105,6 +105,13 @@ # define VL_UNIQUE_PTR auto_ptr #endif +//========================================================================= +// Optimization + +#ifndef VL_INLINE_OPT +# define VL_INLINE_OPT ///< "inline" if compiling all objects in single compiler run +#endif + //========================================================================= // Warning disabled diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 6c1acb271..f41676977 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -4699,6 +4699,7 @@ private: bool m_formCallTree:1; // Make a global function to call entire tree of functions bool m_slow:1; // Slow routine, called once or just at init time bool m_funcPublic:1; // From user public task/function + bool m_isInline:1; // Inline function bool m_isStatic:1; // Function is declared static (no this) bool m_symProlog:1; // Setup symbol table for later instructions bool m_entryPoint:1; // User may call into this top level function @@ -4719,6 +4720,7 @@ public: m_formCallTree = false; m_slow = false; m_funcPublic = false; + m_isInline = false; m_isStatic = true; // Note defaults to static, later we see where thisp is needed m_symProlog = false; m_entryPoint = false; @@ -4748,6 +4750,7 @@ public: string rtnTypeVoid() const { return ((m_rtnType=="") ? "void" : m_rtnType); } bool dontCombine() const { return m_dontCombine || funcType()!=AstCFuncType::FT_NORMAL; } void dontCombine(bool flag) { m_dontCombine = flag; } + bool dontInline() const { return !dontCombine() && !slow() && !skipDecl() && !funcPublic(); } bool skipDecl() const { return m_skipDecl; } void skipDecl(bool flag) { m_skipDecl = flag; } bool declPrivate() const { return m_declPrivate; } @@ -4762,6 +4765,8 @@ public: string argTypes() const { return m_argTypes; } void funcType(AstCFuncType flag) { m_funcType = flag; } AstCFuncType funcType() const { return m_funcType; } + bool isInline() const { return m_isInline; } + void isInline(bool flag) { m_isInline = flag; } bool isStatic() const { return m_isStatic; } void isStatic(bool flag) { m_isStatic = flag; } bool symProlog() const { return m_symProlog; } diff --git a/src/V3Branch.cpp b/src/V3Branch.cpp index 699f9b8e2..c887d75c5 100644 --- a/src/V3Branch.cpp +++ b/src/V3Branch.cpp @@ -21,6 +21,9 @@ // At each IF/(IF else). // Count underneath $display/$stop statements. // If more on if than else, this branch is unlikely, or vice-versa. +// At each FTASKREF, +// Count calls into the function +// Then, if FTASK is called only once, add inline attribute // //************************************************************************* @@ -40,9 +43,18 @@ class BranchVisitor : public AstNVisitor { private: + // NODE STATE + // Entire netlist: + // AstFTask::user1() -> int. Number of references + AstUser1InUse m_inuser1; + + // TYPES + typedef vector CFuncVec; + // STATE int m_likely; // Excuses for branch likely taken int m_unlikely; // Excuses for branch likely not taken + CFuncVec m_cfuncsp; // List of all tasks // METHODS static int debug() { @@ -55,6 +67,12 @@ private: m_likely = false; m_unlikely = false; } + void checkUnlikely(AstNode* nodep) { + if (nodep->isUnlikely()) { + UINFO(4," UNLIKELY: "<isUnlikely()) { - UINFO(4," UNLIKELY: "<funcp()->user1Inc(); nodep->iterateChildren(*this); } + virtual void visit(AstCFunc* nodep, AstNUser*) { + checkUnlikely(nodep); + m_cfuncsp.push_back(nodep); + nodep->iterateChildren(*this); + } + virtual void visit(AstNode* nodep, AstNUser*) { + checkUnlikely(nodep); + nodep->iterateChildren(*this); + } + + // METHODS + void calc_tasks() { + for (CFuncVec::iterator it=m_cfuncsp.begin(); it!=m_cfuncsp.end(); ++it) { + AstCFunc* nodep = *it; + if (!nodep->dontInline()) { + nodep->isInline(true); + } + } + } public: // CONSTUCTORS BranchVisitor(AstNetlist* nodep) { reset(); nodep->iterateChildren(*this); + calc_tasks(); } virtual ~BranchVisitor() {} }; diff --git a/src/V3EmitC.cpp b/src/V3EmitC.cpp index 53e359704..fe26a87b6 100644 --- a/src/V3EmitC.cpp +++ b/src/V3EmitC.cpp @@ -829,6 +829,7 @@ class EmitCImp : EmitCStmts { splitSizeInc(nodep); puts("\n"); + if (nodep->isInline()) puts("VL_INLINE_OPT "); puts(nodep->rtnTypeVoid()); puts(" "); puts(modClassName(m_modp)+"::"+nodep->name() +"("+cFuncArgs(nodep)+") {\n"); diff --git a/test_regress/Makefile_obj b/test_regress/Makefile_obj index 0a487ebd5..2b68424d1 100644 --- a/test_regress/Makefile_obj +++ b/test_regress/Makefile_obj @@ -52,7 +52,7 @@ endif #Our own compile rules; Faster compile, all in one file $(VM_PREFIX)__ALLboth.cpp: $(VK_CLASSES_CPP) $(VK_SUPPORT_CPP) - $(SP_INCLUDER) $^ > $@ + $(VERILATOR_INCLUDER) -DVL_INLINE_OPT=inline $^ > $@ $(VM_PREFIX)__ALLboth.o: $(VM_PREFIX)__ALLboth.cpp $(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<