From e8a23cf8bc04f3b865063dab0f5624a2a50e697c Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Tue, 26 Jun 2018 07:11:56 -0400 Subject: [PATCH] Fix to ignore Unicode UTF-8 BOM sequences, msg2576. --- Changes | 2 ++ src/V3PreLex.l | 3 +++ test_regress/t/t_unicode.pl | 50 +++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100755 test_regress/t/t_unicode.pl diff --git a/Changes b/Changes index 6200b2026..8a65773c1 100644 --- a/Changes +++ b/Changes @@ -27,6 +27,8 @@ The contributors that suggested a given feature are shown in []. Thanks! **** Fix define argument stringification (`"), broke since 3.914. [Joe DErrico] +**** Fix to ignore Unicode UTF-8 BOM sequences, msg2576. [HyungKi Jeong] + * Verilator 3.924 2018-06-12 diff --git a/src/V3PreLex.l b/src/V3PreLex.l index c763de013..25e2e4b16 100644 --- a/src/V3PreLex.l +++ b/src/V3PreLex.l @@ -76,10 +76,13 @@ symb ([a-zA-Z_][a-zA-Z0-9_$]*|\\[^ \t\f\r\n]+) symbdef ([a-zA-Z_][a-zA-Z0-9_$]*|\\[^ \t\f\r\n`]+) word [a-zA-Z0-9_]+ drop [\032] +bom [\357\273\277] + /**************************************************************/ %% +{bom} { } ^{ws}*"`line"{ws}+.*{crnl} { LEXP->lineDirective(yytext); return(VP_LINE); } diff --git a/test_regress/t/t_unicode.pl b/test_regress/t/t_unicode.pl new file mode 100755 index 000000000..ecc4c14c2 --- /dev/null +++ b/test_regress/t/t_unicode.pl @@ -0,0 +1,50 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you can +# redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +use IO::File; +#use Data::Dumper; +use strict; +use vars qw($Self); + +scenarios(simulator => 1); + +sub gen { + my $filename = shift; + + my $fh = IO::File->new(">$filename"); + $fh->print(chr(0xEF).chr(0xBB).chr(0xBF)); # BOM + $fh->print("// Bom\n"); + $fh->print("// Generated by t_unicode.pl\n"); + $fh->print("module t;\n"); + $fh->print(" // Chinese " + .chr(0xe8).chr(0xaf).chr(0x84).chr(0xe8).chr(0xae).chr(0xba) # Comment + ."\n"); + $fh->print(" initial begin\n"); + $fh->print(" \$write(\"Hello " + .chr(0xe4).chr(0xb8).chr(0x96).chr(0xe7).chr(0x95).chr(0x8c) # World + ."\\n\");\n"); + $fh->print(" \$write(\"*-* All Finished *-*\\n\");\n"); + $fh->print(" \$finish;\n"); + $fh->print(" end\n"); + $fh->print("endmodule\n"); +} + +top_filename("$Self->{obj_dir}/t_unicode.v"); + +gen($Self->{top_filename}); + +compile( + ); + +execute( + check_finished => 1, + expect => q{Hello \344\270\226\347\225\214.*}, + ); + +ok(1); +1;