From 215bdfccc25b60870011b39b7ebfac0a22d5d2d0 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 24 Apr 2008 14:26:01 +0000 Subject: [PATCH] Use GCC 4 compiler builtins for XORs git-svn-id: file://localhost/svn/verilator/trunk/verilator@1033 77ca24e4-aefa-0310-84f0-b9a241c72d87 --- include/verilated.h | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/include/verilated.h b/include/verilated.h index a6f999f7c..cce164388 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -460,39 +460,55 @@ static inline IData VL_REDOR_W(int words, WDataInP lwp) { } // EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; -static inline IData VL_REDXOR_W(int words, WDataInP lwp) { - IData r = lwp[0]; - for (int i=1; i < words; i++) r ^= lwp[i]; - r=(r^(r>>1)); - r=(r^(r>>2)); - r=(r^(r>>4)); - r=(r^(r>>8)); - r=(r^(r>>16)); - return r; -} static inline IData VL_REDXOR_2(IData r) { + // Experiments show VL_REDXOR_2 is faster than __builtin_parityl r=(r^(r>>1)); return r; } static inline IData VL_REDXOR_4(IData r) { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else r=(r^(r>>1)); r=(r^(r>>2)); return r; +#endif } static inline IData VL_REDXOR_8(IData r) { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else r=(r^(r>>1)); r=(r^(r>>2)); r=(r^(r>>4)); return r; +#endif } static inline IData VL_REDXOR_16(IData r) { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else r=(r^(r>>1)); r=(r^(r>>2)); r=(r^(r>>4)); r=(r^(r>>8)); return r; +#endif } static inline IData VL_REDXOR_32(IData r) { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else r=(r^(r>>1)); r=(r^(r>>2)); r=(r^(r>>4)); r=(r^(r>>8)); r=(r^(r>>16)); return r; +#endif } static inline IData VL_REDXOR_64(QData r) { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityll(r); +#else r=(r^(r>>1)); r=(r^(r>>2)); r=(r^(r>>4)); r=(r^(r>>8)); r=(r^(r>>16)); r=(r^(r>>32)); return r; +#endif +} +static inline IData VL_REDXOR_W(int words, WDataInP lwp) { + IData r = lwp[0]; + for (int i=1; i < words; i++) r ^= lwp[i]; + return VL_REDXOR_32(r); } static inline IData VL_CLOG2_I(IData lhs) { @@ -529,6 +545,7 @@ static inline IData VL_CLOG2_W(int words, WDataInP lwp) { // EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean static inline IData VL_COUNTONES_I(IData lhs) { + // This is faster than __builtin_popcountl IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); r = (r + (r>>3)) & 030707070707; r = (r + (r>>6));