From 38ab22bf1d0ba9264f5a5b6f178a39589a2104f7 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 18 Sep 2017 21:36:18 -0400 Subject: [PATCH] Simplify VL_CONST_W macro generation for faster compiles. --- Changes | 2 + include/verilated.h | 65 +++++++++---- src/V3EmitC.cpp | 74 +++++++++++---- src/V3EmitCInlines.cpp | 40 +------- test_regress/t/t_dist_manifest.pl | 2 +- test_regress/t/t_dist_whitespace.pl | 2 +- test_regress/t/t_emit_constw.v | 140 ++++++++++++++++++++++------ 7 files changed, 226 insertions(+), 99 deletions(-) diff --git a/Changes b/Changes index 53199b05f..a6baf97df 100644 --- a/Changes +++ b/Changes @@ -12,6 +12,8 @@ The contributors that suggested a given feature are shown in []. Thanks! **** Add performance information to --stats file. +**** Simplify VL_CONST_W macro generation for faster compiles. + **** Fix LITENDIAN warning on arrayed cells, bug1202. [Mike Popoloski] **** Fix enum ranges without colons, bug1204. [Mike Popoloski] diff --git a/include/verilated.h b/include/verilated.h index 731a52837..208bc9a02 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -1895,64 +1895,97 @@ static inline WDataOutP VL_COND_WIWW(int obits, int, int, int, // Constification // VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) -// Sets wide vector words to specified constant words, zeros upper data. - +// Sets wide vector words to specified constant words. +// These macros are used when o might represent more words then are given as constants, +// hence all upper words must be zeroed. // If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW #define _END(obits,wordsSet) \ for(int i=(wordsSet);inum().toString()); puts(")"); } else if (nodep->isWide()) { - putbs("VL_CONST_W_"); - puts(cvtToStr(VL_WORDS_I(nodep->num().widthMin()))); - puts("X("); - puts(cvtToStr(nodep->widthMin())); - puts(","); - if (!assigntop) { - puts(assignString); - } else if (assigntop->castVarRef()) { - puts(assigntop->hiername()); - puts(assigntop->varp()->name()); - } else { - assigntop->iterateAndNext(*this); + int upWidth = nodep->num().widthMin(); + int chunks = 0; + if (upWidth > EMITC_NUM_CONSTW*VL_WORDSIZE) { + // Output e.g. 8 words in groups of e.g. 8 + chunks = (upWidth-1) / (EMITC_NUM_CONSTW*VL_WORDSIZE); + upWidth %= (EMITC_NUM_CONSTW*VL_WORDSIZE); + if (upWidth == 0) upWidth = (EMITC_NUM_CONSTW*VL_WORDSIZE); } - for (int word=VL_WORDS_I(nodep->num().widthMin())-1; word>0; word--) { - // Only 32 bits - llx + long long here just to appease CPP format warning - ofp()->printf(",0x%08" VL_PRI64 "x", (vluint64_t)(nodep->num().dataWord(word))); + { // Upper e.g. 8 words + if (chunks) { + putbs("VL_CONSTHI_W_"); + puts(cvtToStr(VL_WORDS_I(upWidth))); + puts("X("); + puts(cvtToStr(nodep->widthMin())); + puts(","); + puts(cvtToStr(chunks*EMITC_NUM_CONSTW*VL_WORDSIZE)); + } else { + putbs("VL_CONST_W_"); + puts(cvtToStr(VL_WORDS_I(upWidth))); + puts("X("); + puts(cvtToStr(nodep->widthMin())); + } + puts(","); + if (!assigntop) { + puts(assignString); + } else if (assigntop->castVarRef()) { + puts(assigntop->hiername()); + puts(assigntop->varp()->name()); + } else { + assigntop->iterateAndNext(*this); + } + for (int word=VL_WORDS_I(upWidth)-1; word>=0; word--) { + // Only 32 bits - llx + long long here just to appease CPP format warning + ofp()->printf(",0x%08" VL_PRI64 "x", (vluint64_t)(nodep->num().dataWord(word+chunks*EMITC_NUM_CONSTW))); + } + puts(")"); + } + for (chunks--; chunks >= 0; chunks--) { + puts(";\n"); + putbs("VL_CONSTLO_W_"); + puts(cvtToStr(EMITC_NUM_CONSTW)); + puts("X("); + puts(cvtToStr(chunks*EMITC_NUM_CONSTW*VL_WORDSIZE)); + puts(","); + if (!assigntop) { + puts(assignString); + } else if (assigntop->castVarRef()) { + puts(assigntop->hiername()); + puts(assigntop->varp()->name()); + } else { + assigntop->iterateAndNext(*this); + } + for (int word=EMITC_NUM_CONSTW-1; word>=0; word--) { + // Only 32 bits - llx + long long here just to appease CPP format warning + ofp()->printf(",0x%08" VL_PRI64 "x", (vluint64_t)(nodep->num().dataWord(word+chunks*EMITC_NUM_CONSTW))); + } + puts(")"); } - ofp()->printf(",0x%08" VL_PRI64 "x)", (vluint64_t)(nodep->num().dataWord(0))); } else if (nodep->isDouble()) { if (int(nodep->num().toDouble()) == nodep->num().toDouble() && nodep->num().toDouble() < 1000 diff --git a/src/V3EmitCInlines.cpp b/src/V3EmitCInlines.cpp index a4ddbe39f..d8de3dbe2 100644 --- a/src/V3EmitCInlines.cpp +++ b/src/V3EmitCInlines.cpp @@ -32,30 +32,15 @@ #include "V3EmitCBase.h" #include "V3Stats.h" -#define EMITCINLINES_NUM_CONSTW 10 // Number of VL_CONST_W_*X's in verilated.h (IE VL_CONST_W_9X is last) - //###################################################################### class EmitCInlines : EmitCBaseVisitor { // STATE - vector m_wordWidths; // What sizes are used? // METHODS void emitInt(); // VISITORS - virtual void visit(AstVar* nodep) { - // All wide constants load into variables, so we can just hunt for them - nodep->iterateChildren(*this); - int words = nodep->widthWords(); - if (words >= EMITCINLINES_NUM_CONSTW ) { - if (int(m_wordWidths.size()) <= words) { - m_wordWidths.resize(words+5); - } - ++ m_wordWidths.at(words); - v3Global.needHInlines(true); - } - } virtual void visit(AstBasicDType* nodep) { if (nodep->keyword() == AstBasicDTypeKwd::STRING) { v3Global.needHeavy(true); // #include via verilated_heavy.h when we create symbol file @@ -94,30 +79,7 @@ void EmitCInlines::emitInt() { puts("\n//======================\n\n"); - for (unsigned words=0; words=0; --i) { - puts(",IData d"+cvtToStr(i)); - if (i && (i % 8 == 0)) puts("\n\t"); - } - puts(") {\n"); - puts(" "); - for (int i=words-1; i>=0; --i) { - puts(" o["+cvtToStr(i)+"]=d"+cvtToStr(i)+";"); - if (i && (i % 8 == 0)) puts("\n "); - } - puts("\n"); - puts(" for(int i="+cvtToStr(words)+";i{verbose}; my %files; foreach my $file (split /\s+/,$manifest_files) { next if $file eq ''; diff --git a/test_regress/t/t_dist_whitespace.pl b/test_regress/t/t_dist_whitespace.pl index 67ff3534b..16635129d 100755 --- a/test_regress/t/t_dist_whitespace.pl +++ b/test_regress/t/t_dist_whitespace.pl @@ -41,7 +41,7 @@ sub get_manifest_files { my $manifest_files = `cd $root && make dist-file-list`; $manifest_files =~ s!.*begin-dist-file-list:!!sg; $manifest_files =~ s!end-dist-file-list:.*$!!sg; - print "MF $manifest_files\n"; + print "MF $manifest_files\n" if $Self->{verbose}; my %files; foreach my $file (split /\s+/,$manifest_files) { next if $file eq ''; diff --git a/test_regress/t/t_emit_constw.v b/test_regress/t/t_emit_constw.v index 9c85821fb..40757de4b 100644 --- a/test_regress/t/t_emit_constw.v +++ b/test_regress/t/t_emit_constw.v @@ -1,4 +1,9 @@ // DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2015 by Wilson Snyder. + +`define checkhw(gotv,w,expv) do if (gotv[(w)*32+:$bits(expv)] !== (expv)) begin $write("%%Error: %s:%0d: got='h%x exp='h%x\n", `__FILE__,`__LINE__, (gotv[(w)*32+:32]), (expv)); $stop; end while(0); module t (/*AUTOARG*/ // Inputs @@ -10,27 +15,53 @@ module t (/*AUTOARG*/ reg [63:0] crc; reg [63:0] sum; - reg [2*32-1:0] w2; initial w2 = {2 {32'h12345678}}; - reg [9*32-1:0] w9; initial w9 = {9 {32'h12345678}}; - reg [10*32-1:0] w10; initial w10 = {10{32'h12345678}}; - reg [11*32-1:0] w11; initial w11 = {11{32'h12345678}}; - reg [15*32-1:0] w15; initial w15 = {15{32'h12345678}}; - reg [31*32-1:0] w31; initial w31 = {31{32'h12345678}}; - reg [47*32-1:0] w47; initial w47 = {47{32'h12345678}}; - reg [63*32-1:0] w63; initial w63 = {63{32'h12345678}}; + bit [4*32-1:0] w4 = {32'h7c709753, 32'hbc8f6059, 32'h3b0db464, 32'h721a8fad}; + + bit [8*32-2:0] w8m = {31'h7146e1bf, 32'ha8549e42, 32'hca6960bd, 32'h191b7f9b, 32'h93d79866, 32'hf4489e2b, 32'h8e9a3236, 32'h1d2a2d1d}; + + bit [8*32-1:0] w8 = {32'hc211addc, 32'he5d4a057, 32'h5cbf88fe, 32'h42cf42e2, 32'heb584263, 32'ha585f118, 32'h231531c8, 32'hc73f7b06}; + + bit [8*32-0:0] w8p = {1'b1, 32'h096aa54b, 32'h48aae18e, 32'hf9502cea, 32'h518c8b61, 32'h9e8641a2, 32'h0dc0249c, 32'hd421a87a, 32'hb8ee9199}; + + bit [9*32-1:0] w9 = {32'hca800ac1, + 32'h0de4823a, 32'ha51663ac, 32'h96351446, 32'h6b0bbcd5, 32'h4a64b530, 32'h4967d59a, 32'hfcc17292, 32'h57926621}; + + bit [16*32-2:0] w16m = {31'h77ad72c7, 32'h73aa9cbb, 32'h7ecf026d, 32'h985a3ed2, 32'hfe961c1d, 32'h7a01df72, 32'h79e13d71, 32'hb69e2e32, + 32'h09fcbc45, 32'hcfd738c1, 32'hc197ac7c, 32'hc316d727, 32'h903034e4, 32'h92a047d1, 32'h6a5357af, 32'ha82ce9c8}; + + bit [16*32-1:0] w16 = {32'he49548a7, 32'ha02336a2, 32'h2bb48f0d, 32'h9974e098, 32'h34ae644f, 32'hca46dc2c, 32'h9f71a468, 32'h64ae043e, + 32'h7bc94d66, 32'h57aba588, 32'h5b9bb4fe, 32'hb87ed644, 32'hd34b5b20, 32'h712928de, 32'h4bdbd28e, 32'ha0576784}; + + bit [16*32-0:0] w16p = {1'b1, 32'hd278a306, 32'h374ce262, 32'hb608c88e, 32'h43d3e446, 32'h42e26866, 32'h44c31148, 32'hd3db659f, 32'hb3b84b2e, + 32'h1aa7a184, 32'h73b28538, 32'h6384e801, 32'h98d58e00, 32'h9c1d1429, 32'hb407730e, 32'he974c1fd, 32'he787c302}; + + bit [17*32-1:0] w17 = {32'hf1e322ac, + 32'hbbdbd761, 32'h760fe07d, 32'h3808cb28, 32'haf313051, 32'h37dc63b9, 32'hdddb418b, 32'he65a9d64, 32'hc1b6ab23, + 32'h11131ac1, 32'h0050e0bc, 32'h442e3754, 32'h0eb4556e, 32'hd153064b, 32'h41349f97, 32'hb6f4149f, 32'h34bb1fb1}; + + function [7:0] bytehash (input [32*32-1:0] data); + integer i; + bytehash = 0; + for (i=0; i<32*32; ++i) begin + bytehash = {bytehash[0], bytehash[7:1]} ^ data[i +: 8]; + end + return bytehash; + endfunction // Aggregate outputs into a single result vector - wire [63:0] result = (w2[63:0] - ^ w9[64:1] - ^ w10[65:2] - ^ w11[66:3] - ^ w15[67:4] - ^ w31[68:5] - ^ w47[69:6] - ^ w63[70:7]); + // verilator lint_off WIDTH + wire [63:0] result = (bytehash(w4) + ^ bytehash(w8m) + ^ bytehash(w8) + ^ bytehash(w8p) + ^ bytehash(w9) + ^ bytehash(w16m) + ^ bytehash(w16) + ^ bytehash(w16p) + ^ bytehash(w17)); + // verilator lint_on WIDTH - // What checksum will we end up with -`define EXPECTED_SUM 64'h184cb39122d8c6e3 +`define EXPECTED_SUM 64'hb6fdb64085fc17f5 // Test loop always @ (posedge clk) begin @@ -43,19 +74,76 @@ module t (/*AUTOARG*/ if (cyc==0) begin // Setup crc <= 64'h5aef0c8d_d70a4497; + // verilator lint_off SELRANGE + `checkhw(w4,3,32'h7c709753); + `checkhw(w4,2,32'hbc8f6059); + `checkhw(w4,1,32'h3b0db464); + `checkhw(w4,0,32'h721a8fad); + `checkhw(w8m,7,31'h7146e1bf); + `checkhw(w8m,6,32'ha8549e42); + `checkhw(w8m,5,32'hca6960bd); + `checkhw(w8m,4,32'h191b7f9b); + `checkhw(w8m,3,32'h93d79866); + `checkhw(w8m,2,32'hf4489e2b); + `checkhw(w8m,1,32'h8e9a3236); + `checkhw(w8m,0,32'h1d2a2d1d); + `checkhw(w8,7,32'hc211addc); + `checkhw(w8,6,32'he5d4a057); + `checkhw(w8,5,32'h5cbf88fe); + `checkhw(w8,4,32'h42cf42e2); + `checkhw(w8,3,32'heb584263); + `checkhw(w8,2,32'ha585f118); + `checkhw(w8,1,32'h231531c8); + `checkhw(w8,0,32'hc73f7b06); + `checkhw(w8p,8,1'b1); + `checkhw(w8p,7,32'h096aa54b); + `checkhw(w8p,6,32'h48aae18e); + `checkhw(w8p,5,32'hf9502cea); + `checkhw(w8p,4,32'h518c8b61); + `checkhw(w8p,3,32'h9e8641a2); + `checkhw(w8p,2,32'h0dc0249c); + `checkhw(w8p,1,32'hd421a87a); + `checkhw(w8p,0,32'hb8ee9199); + `checkhw(w9,8,32'hca800ac1); + `checkhw(w9,7,32'h0de4823a); + `checkhw(w9,6,32'ha51663ac); + `checkhw(w9,5,32'h96351446); + `checkhw(w9,4,32'h6b0bbcd5); + `checkhw(w9,3,32'h4a64b530); + `checkhw(w9,2,32'h4967d59a); + `checkhw(w9,1,32'hfcc17292); + `checkhw(w9,0,32'h57926621); + `checkhw(w16m,15,31'h77ad72c7); + `checkhw(w16m,14,32'h73aa9cbb); + `checkhw(w16m,13,32'h7ecf026d); + `checkhw(w16m,12,32'h985a3ed2); + `checkhw(w16m,11,32'hfe961c1d); + `checkhw(w16m,10,32'h7a01df72); + `checkhw(w16m,9,32'h79e13d71); + `checkhw(w16m,8,32'hb69e2e32); + `checkhw(w16m,7,32'h09fcbc45); + `checkhw(w16m,6,32'hcfd738c1); + `checkhw(w16m,5,32'hc197ac7c); + `checkhw(w16m,4,32'hc316d727); + `checkhw(w16m,3,32'h903034e4); + `checkhw(w16m,2,32'h92a047d1); + `checkhw(w16m,1,32'h6a5357af); + `checkhw(w16m,0,32'ha82ce9c8); + // verilator lint_on SELRANGE end else if (cyc<10) begin sum <= 64'h0; end else if (cyc<90) begin - w2 <= w2 >> 1; - w9 <= w9 >> 1; - w10 <= w10 >> 1; - w11 <= w11 >> 1; - w15 <= w15 >> 1; - w31 <= w31 >> 1; - w47 <= w47 >> 1; - w63 <= w63 >> 1; + w4 = w4 >>> 1; + w8m = w8m >>> 1; + w8 = w8 >>> 1; + w8p = w8p >>> 1; + w9 = w9 >>> 1; + w16m = w16m >>> 1; + w16 = w16 >>> 1; + w16p = w16p >>> 1; + w17 = w17 >>> 1; end else if (cyc==99) begin $write("[%0t] cyc==%0d crc=%x sum=%x\n",$time, cyc, crc, sum);