#!/usr/bin/env perl # See copyright, etc in below POD section. ###################################################################### use warnings; use strict; use Getopt::Long; use IO::File; use Pod::Usage; use vars qw($Debug); $Debug = 0; my $Opt_File; my $Opt_Time_Per_Char = 0; # rdtsc ticks per char in gantt chart, 0=auto my $opt_vcd = "profile_threads.vcd"; our %Threads; our %Mtasks; our %Global; autoflush STDOUT 1; autoflush STDERR 1; Getopt::Long::config("no_auto_abbrev"); if (! GetOptions( "help" => \&usage, "scale=i" => \$Opt_Time_Per_Char, "debug" => sub { $Debug = 1; }, "vcd=s" => \$opt_vcd, "no-vcd!" => sub { $opt_vcd = undef; }, "<>" => \¶meter, )) { die "%Error: Bad usage, try 'verilator_gantt --help'\n"; } $Opt_File = "profile_threads.dat" if !defined $Opt_File; process($Opt_File); write_vcd($opt_vcd) if defined $opt_vcd; exit(0); ####################################################################### sub usage { pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT); exit(1); # Unreachable } sub parameter { my $param = shift; if (!defined $Opt_File) { $Opt_File = $param; } else { die "%Error: Unknown parameter: $param\n"; } } ####################################################################### sub process { my $filename = shift; read_data($filename); report(); } ####################################################################### sub read_data { my $filename = shift; %Global = (rdtsc_cycle_time => 0); my $fh = IO::File->new ($filename) or die "%Error: $! $filename,"; while (my $line = $fh->getline) { if ($line =~ m/VLPROF mtask\s(\d+)\sstart\s(\d+)\send\s(\d+)\selapsed\s(\d+)\spredict_time\s(\d+)\scpu\s(\d+)\son thread (\d+)/) { my $mtask = $1; my $start = $2; my $end = $3; my $elapsed_time = $4; my $predict_time = $5; my $cpu = $6; my $thread = $7; $Threads{$thread}{$start}{mtask} = $mtask; $Threads{$thread}{$start}{end} = $end; $Threads{$thread}{$start}{cpu} = $cpu; if (!exists $Mtasks{$mtask}{elapsed}) { $Mtasks{$mtask}{elapsed} = 0; } $Mtasks{$mtask}{elapsed} += $elapsed_time; $Mtasks{$mtask}{predict} = $predict_time; $Mtasks{$mtask}{end} = max($Mtasks{$mtask}{end}, $end); } elsif ($line =~ /^VLPROFTHREAD/) {} elsif ($line =~ m/VLPROF arg\s+(\S+)\+([0-9.])\s*$/ || $line =~ m/VLPROF arg\s+(\S+)\s+([0-9.])\s*$/) { $Global{args}{$1} = $2; } elsif ($line =~ m/VLPROF stat\s+(\S+)\s+([0-9.]+)/) { $Global{stats}{$1} = $2; } elsif ($line =~ /^#/) {} elsif ($Debug) { chomp $line; print "Unk: $line\n"; } # TODO -- this is parsing text printed by a client. # Really, verilator proper should generate this # if it's useful... if ($line =~ m/rdtsc time = (\d+) ticks/) { $Global{rdtsc_cycle_time} = $1; } } } sub report { print "Verilator Gantt report\n"; print "\nArgument settings:\n"; foreach my $arg (sort keys %{$Global{args}}) { my $plus = ($arg =~ /^\+/) ? "+" : " "; printf " %s%s%d\n", $arg, $plus, $Global{args}{$arg}; } my $nthreads = scalar keys %Threads; $Global{cpus} = {}; foreach my $thread (keys %Threads) { # Make potentially multiple characters per column foreach my $start (keys %{$Threads{$thread}}) { my $cpu = $Threads{$thread}{$start}{cpu}; my $elapsed = $Threads{$thread}{$start}{end} - $start; $Global{cpus}{$cpu}{cpu_time} += $elapsed; } } my $mt_mtask_time = 0; my $long_mtask_time = 0; my $last_end = 0; foreach my $mtask (keys %Mtasks) { $mt_mtask_time += $Mtasks{$mtask}{elapsed}; $last_end = max($last_end, $Mtasks{$mtask}{end}); $long_mtask_time = max($long_mtask_time, $Mtasks{$mtask}{elapsed}); } $Global{last_end} = $last_end; report_graph(); # If we know cycle time in the same (rdtsc) units, # this will give us an actual utilization number, # (how effectively we keep the cores busy.) # # It also gives us a number we can compare against # serial mode, to estimate the overhead of data sharing, # which will show up in the total elapsed time. (Overhead # of synchronization and scheduling should not.) print "\nAnalysis:\n"; printf " Total threads = %d\n", $nthreads; printf " Total mtasks = %d\n", scalar(keys %Mtasks); printf " Total cpus used = %d\n", scalar(keys %{$Global{cpus}}); printf " Total yields = %d\n", $Global{stats}{yields}; printf " Total eval time = %d rdtsc ticks\n", $Global{last_end}; printf " Longest mtask time = %d rdtsc ticks\n", $long_mtask_time; printf " All-thread mtask time = %d rdtsc ticks\n", $mt_mtask_time; my $long_efficiency = $long_mtask_time/($Global{last_end}); printf " Longest-thread efficiency = %0.1f%%\n", $long_efficiency*100; my $mt_efficiency = $mt_mtask_time/($Global{last_end}*$nthreads); printf " All-thread efficiency = %0.1f%%\n", $mt_efficiency*100; printf " All-thread speedup = %0.1f\n", $mt_efficiency*$nthreads; if ($Global{rdtsc_cycle_time} > 0) { my $ut = $mt_mtask_time / $Global{rdtsc_cycle_time}; print "tot_mtask_cpu=$mt_mtask_time cyc=$Global{rdtsc_cycle_time} ut=$ut\n"; } my @p2e_ratios; my $min_p2e = 1000000; my $min_mtask; my $max_p2e = -1000000; my $max_mtask; foreach my $mtask (sort keys %Mtasks) { if ($Mtasks{$mtask}{elapsed} > 0) { if ($Mtasks{$mtask}{predict} == 0) { $Mtasks{$mtask}{predict} = 1; # don't log(0) below } my $p2e_ratio = log( $Mtasks{$mtask}{predict} / $Mtasks{$mtask}{elapsed} ); #print "log(p2e $mtask) = $p2e_ratio (predict $Mtasks{$mtask}{predict}, elapsed $Mtasks{$mtask}{elapsed})\n"; push @p2e_ratios, $p2e_ratio; if ($p2e_ratio > $max_p2e) { $max_p2e = $p2e_ratio; $max_mtask = $mtask; } if ($p2e_ratio < $min_p2e) { $min_p2e = $p2e_ratio; $min_mtask = $mtask; } } } print "\nStatistics:\n"; print " min log(p2e) = $min_p2e from mtask $min_mtask (predict $Mtasks{$min_mtask}{predict}, elapsed $Mtasks{$min_mtask}{elapsed})\n"; print " max log(p2e) = $max_p2e from mtask $max_mtask (predict $Mtasks{$max_mtask}{predict}, elapsed $Mtasks{$max_mtask}{elapsed})\n"; my $stddev = stddev(\@p2e_ratios); my $mean = mean(\@p2e_ratios); print " mean = " . ($mean) . "\n"; print " stddev = " . ($stddev) . "\n"; print " e ^ stddev = " . exp($stddev). "\n"; print "\n"; } sub report_graph { my $time_per = $Opt_Time_Per_Char; if ($time_per == 0) { $time_per = ($Global{last_end} / 40); # Start with 40 columns while ($time_per > 10) { my ($graph, $conflicts) = _make_graph($time_per); last if !$conflicts; $time_per = int($time_per/2); } # One more step so we can fit more labels $time_per = int($time_per/2); } my ($graph, $conflicts) = _make_graph($time_per); print "\nThread gantt graph:\n"; print " Legend: One character width = $time_per rdtsc ticks\n"; print " Legend: '&' = multiple mtasks in this period (character width)\n"; my $scale = " <-".$Global{last_end}." rdtsc total"; for (my $col = length($scale); # -2 for '->' below $col < ($Global{last_end}/$time_per); ++$col) { $scale .= "-"; } print " $scale->\n"; foreach my $thread (sort keys %{$graph}) { print " t: "; _print_graph_line($graph->{$thread}, ''); } } sub _make_graph { my $time_per = shift; my $graph = {}; # {thread}{column}{char=>'x' or chars=>#} my $conflicts = 0; foreach my $thread (keys %Threads) { # Make potentially multiple characters per column foreach my $start (sort {$a <=> $b} keys %{$Threads{$thread}}) { my $end = $Threads{$thread}{$start}{end}; my $mtask = $Threads{$thread}{$start}{mtask}; my $cpu = $Threads{$thread}{$start}{cpu}; my $startcol = _time_col($time_per, $start); my $endcol = _time_col($time_per, $end); my $label = "["; $label .= "$cpu"; # Maybe make optional in future my $width = $endcol - $startcol + 1; while (length($label) < ($width-1)) { # -1 for ']' $label .= "-"; } $label .= "]"; $graph->{$thread}[$startcol]{char} .= $label; } if ($Debug) { print "# Multicol: "; _print_graph_line($graph->{$thread}, '|'); } # Expand line to one char per column for (my $col = 0; $col <= $#{$graph->{$thread}}; ++$col) { if (my $chars = $graph->{$thread}[$col]{char}) { my $ok = 1; for (my $coladd = 1; $coladd{$thread}[$col + $coladd]{char}) { $ok = 0; last; } } if (!$ok) { if ($chars =~ /\[.*\[/) { # Two begins or more $conflicts++; $graph->{$thread}[$col]{char} = "&"; } else { $graph->{$thread}[$col]{char} = "["; } for (my $coladd = 1; $coladd{$thread}[$col + $coladd]{char}) { last; } else { $graph->{$thread}[$col + $coladd]{char} = 'x'; } } } else { my $coladd = 0; foreach my $char (split //, $chars) { $graph->{$thread}[$col+$coladd]{char} = $char; ++$coladd; } } } } if ($Debug) { print "# Singlcol: "; _print_graph_line($graph->{$thread}, '|'); } } print "# Conflicts $conflicts\n" if $Debug; return ($graph, $conflicts); } sub _print_graph_line { my $graph_thread = shift; my $sep = shift; for (my $col = 0; $col <= $#{$graph_thread}; ++$col) { my $c = $graph_thread->[$col]{char}; $c=' ' if !defined $c; print $c, $sep; } print "\n"; } sub _time_col { my $time_per = shift; my $time = shift; return int($time/$time_per); } ####################################################################### sub write_vcd { my $filename = shift; print "Writing $filename\n"; my $fh = IO::File->new(">$filename") or die "%Error: $! $filename,"; my $vcd = {values => {}, # {