From 8d61cd9029ad03df4a6e9b2fad36aedb11ffe673 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 4 Jun 2020 20:44:35 -0400 Subject: [PATCH] New fastcov internal coverage scheme --- nodist/code_coverage | 144 +++++++-- nodist/fastcov.py | 718 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 828 insertions(+), 34 deletions(-) create mode 100755 nodist/fastcov.py diff --git a/nodist/code_coverage b/nodist/code_coverage index 07cc22628..a96489dce 100755 --- a/nodist/code_coverage +++ b/nodist/code_coverage @@ -6,6 +6,7 @@ use warnings; use Cwd; use File::Copy qw(cp); use File::Path qw(mkpath); +use File::Spec; use FindBin qw($RealBin); use Getopt::Long; use Parallel::Forker; @@ -16,7 +17,7 @@ use strict; use vars qw($Debug); our $Opt_Stop = 1; -our $Opt_Fastcov = 0; +our $Opt_Fastcov = 1; our $Exclude_Line_Regexp; our $Remove_Gcda_Regexp; @@ -35,6 +36,7 @@ our $Opt_Hashset; our $opt_stages = ''; our $Opt_Scenarios; our %Opt_Stages; +our @Opt_Tests; autoflush STDOUT 1; autoflush STDERR 1; @@ -45,8 +47,10 @@ if (! GetOptions( "<>" => sub { die "%Error: Unknown parameter: $_[0]\n"; }, "fastcov!" => \$Opt_Fastcov, # use fastcov, not documented, for debug "scenarios=s" => \$Opt_Scenarios, # driver.pl scenarios + "stage=s" => \$opt_stages, # starting stage number "stages=s" => \$opt_stages, # starting stage number "stop!" => \$Opt_Stop, # stop/do not stop on error in tests + "test=s@" => \@Opt_Tests, # test name )) { die "%Error: Bad usage, try 'code_coverage --help'\n"; } @@ -82,7 +86,8 @@ sub test { print "Stage 1: configure (coverage on)\n"; run("make distclean || true"); run("autoconf"); - run("./configure --enable-longtests CXX='g++ --coverage'"); + # Exceptions can pollute the branch coverage data + run("./configure --enable-longtests CXX='g++ --coverage -fno-exceptions -DVL_GCOV'"); travis_fold_end(); } @@ -90,7 +95,7 @@ sub test { travis_fold_start("build"); print "Stage 2: build\n"; my $nproc = Unix::Processors->new->max_online; - run("make -k -j $nproc"); + run("make -k -j $nproc VERILATOR_NO_OPT_BUILD=1"); # The optimized versions will not collect good coverage, overwrite them run("cp bin/verilator_bin_dbg bin/verilator_bin"); run("cp bin/verilator_coverage_bin_dbg bin/verilator_coverage_bin"); @@ -100,12 +105,21 @@ sub test { if ($Opt_Stages{3}) { travis_fold_start("test"); print "Stage 3: make tests (with coverage on)\n"; - run("make examples") - if !$Opt_Scenarios || $Opt_Scenarios =~ /dist/i; - run("make test_regress" - . ($Opt_Scenarios ? " SCENARIOS='".$Opt_Scenarios."'" : "") - . ($Opt_Hashset ? " DRIVER_HASHSET='--hashset=".$Opt_Hashset."'" : "") - . ($Opt_Stop ? '' : ' || true')); + if ($#Opt_Tests < 0) { + run("make examples VERILATOR_NO_OPT_BUILD=1") + if !$Opt_Scenarios || $Opt_Scenarios =~ /dist/i; + run("make test_regress VERILATOR_NO_OPT_BUILD=1" + . ($Opt_Scenarios ? " SCENARIOS='".$Opt_Scenarios."'" : "") + . ($Opt_Hashset ? " DRIVER_HASHSET='--hashset=".$Opt_Hashset."'" : "") + . ($Opt_Stop ? '' : ' || true')); + } else { + foreach my $test (@Opt_Tests) { + if (! -f $test && -f "test_regress/t/${test}") { + $test = "test_regress/t/${test}"; + } + run($test); + } + } travis_fold_end(); } @@ -120,7 +134,6 @@ sub test { foreach my $dat (split '\n', $dats) { $dats{$dat} = 1; } - my %gcnos; foreach my $dat (sort keys %dats) { (my $gcno = $dat) =~ s!\.gcda$!.gcno!; if ($dat =~ /$Remove_Gcda_Regexp/) { @@ -131,10 +144,12 @@ sub test { delete $dats{$dat}; next; } + } + $dats = `find . -print | grep .gcno`; + my %gcnos; + foreach my $gcno (split '\n', $dats) { (my $gbase = $gcno) =~ s!.*/!!; - if (!$gcnos{$gbase} && -r $gcno) { - $gcnos{$gbase} = $gcno; - } + $gcnos{$gbase} = File::Spec->rel2abs($gcno); } # We need a matching .gcno for every .gcda, try to find a matching file elsewhere foreach my $dat (sort keys %dats) { @@ -142,7 +157,8 @@ sub test { (my $gbase = $gcno) =~ s!.*/!!; if (!-r $gcno) { if ($gcnos{$gbase}) { - cp($gcnos{$gbase}, $gcno); + symlink($gcnos{$gbase}, $gcno) + or die "%Error: can't ln -s $gcnos{$gbase} $gcno,"; } else { warn "MISSING .gcno for a .gcda: $gcno\n"; } @@ -155,7 +171,12 @@ sub test { travis_fold_start("fastcov"); # Must run in root directory to find all files mkpath($cc_dir); - run("${RealBin}/fastcov.py -X --lcov --exclude /usr -o ${cc_dir}/app_fastcov.info"); + #run("${RealBin}/fastcov.py -b -c src/obj_dbg -X". + # " --exclude /usr --exclude test_regress" + # ." -o ${cc_dir}/app_total.json"); + run("${RealBin}/fastcov.py -b -c src/obj_dbg -X --lcov". + " --exclude /usr --exclude test_regress" + ." -o ${cc_dir}/app_total.info"); travis_fold_end(); } @@ -182,13 +203,14 @@ sub test { if ($Opt_Stages{6}) { travis_fold_start("clone"); - # lcov doesn't have a control file to override single lines, so replicate the sources + # No control file to override single lines, so replicate the sources + # Also lets us see the insertion markers in the HTML source res print "Stage 6: Clone sources under $cc_dir\n"; clone_sources($cc_dir); travis_fold_end(); } - if ($Opt_Stages{8}) { + if ($Opt_Stages{8} && !$Opt_Fastcov) { travis_fold_start("copy"); print "Stage 8: Copy .gcno files\n"; my $dats = `find . -print | grep .gcno`; @@ -201,14 +223,12 @@ sub test { travis_fold_end(); } - if ($Opt_Stages{10}) { + if ($Opt_Stages{10} && !$Opt_Fastcov) { travis_fold_start("combine"); print "Stage 10: Combine data files\n"; - run("cd $cc_dir ; lcov -c -i -d src/obj_dbg -o app_base.info"); - run("cd $cc_dir ; lcov -a app_base.info -o app_total.info"); - if ($Opt_Fastcov) { - run("cd $cc_dir ; lcov -a app_base.info -a app_fastcov.info -o app_total.info"); - } else { + { + run("cd $cc_dir ; lcov -c -i -d src/obj_dbg -o app_base.info"); + run("cd $cc_dir ; lcov -a app_base.info -o app_total.info"); my $infos = `cd $cc_dir ; find info -print | grep .info`; my $comb = ""; my @infos = (sort (split /\n/, $infos)); @@ -228,25 +248,46 @@ sub test { if ($Opt_Stages{11}) { travis_fold_start("dirs"); print "Stage 11: Cleanup paths\n"; - cleanup_abs_paths($cc_dir, "$cc_dir/app_total.info", "$cc_dir/app_total.info"); + if ($Opt_Fastcov) { + cleanup_abs_paths_info($cc_dir, "$cc_dir/app_total.info", "$cc_dir/app_total.info"); + #cleanup_abs_paths_json($cc_dir, "$cc_dir/app_total.json", "$cc_dir/app_total.json"); + } else { + cleanup_abs_paths_info($cc_dir, "$cc_dir/app_total.info", "$cc_dir/app_total.info"); + } travis_fold_end(); } if ($Opt_Stages{12}) { travis_fold_start("filter"); print "Stage 12: Filter processed source files\n"; - my $cmd = ''; - foreach my $glob (@Remove_Sources) { - $cmd .= " '$glob'"; + my $inc = ''; + foreach my $glob (@Source_Globs) { + foreach my $infile (glob $glob) { + $inc .= " '$infile'"; + } + } + my $exc = ''; + foreach my $glob (@Remove_Sources) { + # Fastcov does exact match not globbing at present + # Lcov requires whole path match so needs the glob + $glob =~ s!^\*!! if $Opt_Fastcov; + $glob =~ s!\*$!! if $Opt_Fastcov; + $exc .= " '$glob'"; + } + if ($Opt_Fastcov) { + $inc = "--include ".$inc if $inc ne ''; + $exc = "--exclude ".$exc if $exc ne ''; + run("cd $cc_dir ; ${RealBin}/fastcov.py -C app_total.info ${inc} ${exc} -x --lcov -o app_total_f.info"); + } else { + run("cd $cc_dir ; lcov --remove app_total.info $exc -o app_total_f.info"); } - run("cd $cc_dir ; lcov --remove app_total.info $cmd -o app_total.info"); travis_fold_end(); } if ($Opt_Stages{17}) { travis_fold_start("report"); print "Stage 17: Create HTML\n"; - run("cd $cc_dir ; genhtml app_total.info --demangle-cpp" + run("cd $cc_dir ; genhtml app_total_f.info --demangle-cpp" ." --rc lcov_branch_coverage=1 --rc genhtml_hi_limit=100 --output-directory html"); travis_fold_end(); } @@ -262,6 +303,9 @@ sub test { if ($Opt_Stages{19}) { print "*-* All Finished *-*\n"; + print "\n"; + print "* See report in ${cc_dir}/html/index.html\n"; + print "* Remember to make distclean && ./configure before working on non-coverage\n"; } } @@ -281,21 +325,29 @@ sub clone_sources { while (defined(my $line = $fh->getline)) { $lineno++; chomp $line; - if ($line !~ m!// LCOV_EXCL_LINE! + if ($line =~ /LCOV_EXCL_LINE/) { + $line .= " LCOV_EXCL_BR_LINE"; + } + elsif ($line =~ /LCOV_EXCL_START/) { + $line .= " LCOV_EXCL_BR_START"; + } + elsif ($line =~ /LCOV_EXCL_STOP/) { + $line .= " LCOV_EXCL_BR_STOP"; + } + elsif ($line !~ m!// LCOV_EXCL_LINE! && $line =~ /$Exclude_Line_Regexp/) { - $line .= " //code_coverage: // LCOV_EXCL_LINE"; + $line .= " //code_coverage: // LCOV_EXCL_LINE LCOV_EXCL_BR_LINE"; $excluded_lines++; #print "$infile:$lineno: $line"; - } else { } $ofh->print("$line\n"); } } } - print "Source code lines automatically LCOV_EXCL_LINE'ed: $excluded_lines\n"; + print "Number of source lines automatically LCOV_EXCL_LINE'ed: $excluded_lines\n"; } -sub cleanup_abs_paths { +sub cleanup_abs_paths_info { my $cc_dir = shift; my $infile = shift; my $outfile = shift; @@ -314,6 +366,25 @@ sub cleanup_abs_paths { $ofh->print(@lines); } +sub cleanup_abs_paths_json { + my $cc_dir = shift; + my $infile = shift; + my $outfile = shift; + # Handcrafted cleanup, alternative would be to deserialize/serialize JSON + # But JSON::Parse not installed by default + # JSON::PP more likely to be installed, but slower + my $fh = IO::File->new("<$infile") or die "%Error: $! $infile,"; + my @lines; + while (defined(my $line = $fh->getline)) { + $line =~ s!"$ENV{VERILATOR_ROOT}/!"!g; + $line =~ s!"$cc_dir/!"!g; + $line =~ s!obj_dbg/verilog.y$!verilog.y!g; + push @lines, $line; + } + my $ofh = IO::File->new(">$outfile") or die "%Error: $! $outfile,"; + $ofh->print(@lines); +} + ####################################################################### # .dat file callbacks @@ -397,6 +468,11 @@ Runs a specific stage or range of stages (see the script). Do not stop collecting data if tests fail. +=item --test I + +Instead of normal regressions, run the specified test. May be specified +multiple times for multiple tests. + =back =head1 DISTRIBUTION diff --git a/nodist/fastcov.py b/nodist/fastcov.py new file mode 100755 index 000000000..48d35cf96 --- /dev/null +++ b/nodist/fastcov.py @@ -0,0 +1,718 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright 2018-present, Bryan Gillespie +""" + Author: Bryan Gillespie + https://github.com/RPGillespie6/fastcov + + A massively parallel gcov wrapper for generating intermediate coverage formats fast + + The goal of fastcov is to generate code coverage intermediate formats as fast as possible, + even for large projects with hundreds of gcda objects. The intermediate formats may then be + consumed by a report generator such as lcov's genhtml, or a dedicated frontend such as coveralls. + + Sample Usage: + $ cd build_dir + $ ./fastcov.py --zerocounters + $ + $ ./fastcov.py --exclude /usr/include test/ --lcov -o report.info + $ genhtml -o code_coverage report.info +""" + +import re +import os +import sys +import glob +import json +import time +import logging +import argparse +import threading +import subprocess +import multiprocessing + +FASTCOV_VERSION = (1,7) +MINIMUM_PYTHON = (3,5) +MINIMUM_GCOV = (9,0,0) + +# Interesting metrics +START_TIME = time.monotonic() +GCOVS_TOTAL = 0 +GCOVS_SKIPPED = 0 + +# Disable all logging in case developers are using this as a module +logging.disable(level=logging.CRITICAL) + +class FastcovFormatter(logging.Formatter): + def format(self, record): + record.levelname = record.levelname.lower() + log_message = super(FastcovFormatter, self).format(record) + return "[{:.3f}s] {}".format(stopwatch(), log_message) + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def incrementCounters(total, skipped): + global GCOVS_TOTAL + global GCOVS_SKIPPED + GCOVS_TOTAL += total + GCOVS_SKIPPED += skipped + +def stopwatch(): + """Return number of seconds since last time this was called""" + global START_TIME + end_time = time.monotonic() + delta = end_time - START_TIME + START_TIME = end_time + return delta + +def parseVersionFromLine(version_str): + """Given a string containing a dotted integer version, parse out integers and return as tuple""" + version = re.search(r'(\d+\.\d+\.\d+)', version_str) + + if not version: + return (0,0,0) + + return tuple(map(int, version.group(1).split("."))) + +def getGcovVersion(gcov): + p = subprocess.Popen([gcov, "-v"], stdout=subprocess.PIPE) + output = p.communicate()[0].decode('UTF-8') + p.wait() + return parseVersionFromLine(output.split("\n")[0]) + +def removeFiles(files): + for file in files: + os.remove(file) + +def getFilteredCoverageFiles(coverage_files, exclude): + def excludeGcda(gcda): + for ex in exclude: + if ex in gcda: + logging.debug("Omitting %s due to '--exclude-gcda %s'", gcda, ex) + return False + return True + return list(filter(excludeGcda, coverage_files)) + +def findCoverageFiles(cwd, coverage_files, use_gcno): + coverage_type = "user provided" + if not coverage_files: + coverage_type = "gcno" if use_gcno else "gcda" + coverage_files = glob.glob(os.path.join(os.path.abspath(cwd), "**/*." + coverage_type), recursive=True) + + logging.info("Found {} coverage files ({})".format(len(coverage_files), coverage_type)) + logging.debug("Coverage files found:\n %s", "\n ".join(coverage_files)) + return coverage_files + +def gcovWorker(data_q, metrics_q, args, chunk, gcov_filter_options): + base_report = {"sources": {}} + gcovs_total = 0 + gcovs_skipped = 0 + + gcov_args = "-it" + if args.branchcoverage or args.xbranchcoverage: + gcov_args += "b" + + p = subprocess.Popen([args.gcov, gcov_args] + chunk, cwd=args.cdirectory, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + for line in iter(p.stdout.readline, b''): + intermediate_json = json.loads(line.decode(sys.stdout.encoding)) + intermediate_json_files = processGcovs(args.cdirectory, intermediate_json["files"], gcov_filter_options) + for f in intermediate_json_files: + distillSource(f, base_report["sources"], args.test_name, args.xbranchcoverage) + gcovs_total += len(intermediate_json["files"]) + gcovs_skipped += len(intermediate_json["files"]) - len(intermediate_json_files) + + p.wait() + data_q.put(base_report) + metrics_q.put((gcovs_total, gcovs_skipped)) + +def processGcdas(args, coverage_files, gcov_filter_options): + chunk_size = max(args.minimum_chunk, int(len(coverage_files) / args.jobs) + 1) + + processes = [] + data_q = multiprocessing.Queue() + metrics_q = multiprocessing.Queue() + for chunk in chunks(coverage_files, chunk_size): + p = multiprocessing.Process(target=gcovWorker, args=(data_q, metrics_q, args, chunk, gcov_filter_options)) + processes.append(p) + p.start() + + logging.info("Spawned {} gcov processes, each processing at most {} coverage files".format(len(processes), chunk_size)) + + fastcov_jsons = [] + for p in processes: + fastcov_jsons.append(data_q.get()) + incrementCounters(*metrics_q.get()) + + for p in processes: + p.join() + + base_fastcov = fastcov_jsons.pop() + for fj in fastcov_jsons: + combineReports(base_fastcov, fj) + + return base_fastcov + +def shouldFilterSource(source, gcov_filter_options): + """Returns true if the provided source file should be filtered due to CLI options, otherwise returns false""" + # If explicit sources were passed, check for match + if gcov_filter_options["sources"]: + if source not in gcov_filter_options["sources"]: + logging.debug("Filtering coverage for '%s' due to option '--source-files'", source) + return True + + # Check exclude filter + for ex in gcov_filter_options["exclude"]: + if ex in source: + logging.debug("Filtering coverage for '%s' due to option '--exclude %s'", source, ex) + return True + + # Check include filter + if gcov_filter_options["include"]: + included = False + for inc in gcov_filter_options["include"]: + if inc in source: + included = True + break + + if not included: + logging.debug("Filtering coverage for '%s' due to option '--include %s'", source, " ".join(gcov_filter_options["include"])) + return True + + return False + +def filterFastcov(fastcov_json, args): + logging.info("Performing filtering operations (if applicable)") + gcov_filter_options = getGcovFilterOptions(args) + for source in list(fastcov_json["sources"].keys()): + if shouldFilterSource(source, gcov_filter_options): + del fastcov_json["sources"][source] + +def processGcov(cwd, gcov, files, gcov_filter_options): + # Add absolute path + gcov["file_abs"] = os.path.abspath(os.path.join(cwd, gcov["file"])) + + if shouldFilterSource(gcov["file_abs"], gcov_filter_options): + return + + files.append(gcov) + logging.debug("Accepted coverage for '%s'", gcov["file_abs"]) + +def processGcovs(cwd, gcov_files, gcov_filter_options): + files = [] + for gcov in gcov_files: + processGcov(cwd, gcov, files, gcov_filter_options) + return files + +def dumpBranchCoverageToLcovInfo(f, branches): + branch_miss = 0 + branch_found = 0 + brda = [] + for line_num, branch_counts in branches.items(): + for i, count in enumerate(branch_counts): + # Branch (, , , ) + brda.append((line_num, int(i/2), i, count)) + branch_miss += int(count == 0) + branch_found += 1 + for v in sorted(brda): + f.write("BRDA:{},{},{},{}\n".format(*v)) + f.write("BRF:{}\n".format(branch_found)) # Branches Found + f.write("BRH:{}\n".format(branch_found - branch_miss)) # Branches Hit + +def dumpToLcovInfo(fastcov_json, output): + with open(output, "w") as f: + sources = fastcov_json["sources"] + for sf in sorted(sources.keys()): + for tn in sorted(sources[sf].keys()): + data = sources[sf][tn] + f.write("TN:{}\n".format(tn)) #Test Name - used mainly in conjuction with genhtml --show-details + f.write("SF:{}\n".format(sf)) #Source File + + fn_miss = 0 + fn = [] + fnda = [] + for function, fdata in data["functions"].items(): + fn.append((fdata["start_line"], function)) # Function Start Line + fnda.append((fdata["execution_count"], function)) # Function Hits + fn_miss += int(fdata["execution_count"] == 0) + # NOTE: lcov sorts FN, but not FNDA. + for v in sorted(fn): + f.write("FN:{},{}\n".format(*v)) + for v in sorted(fnda): + f.write("FNDA:{},{}\n".format(*v)) + f.write("FNF:{}\n".format(len(data["functions"]))) #Functions Found + f.write("FNH:{}\n".format((len(data["functions"]) - fn_miss))) #Functions Hit + + if data["branches"]: + dumpBranchCoverageToLcovInfo(f, data["branches"]) + + line_miss = 0 + da = [] + for line_num, count in data["lines"].items(): + da.append((line_num, count)) + line_miss += int(count == 0) + for v in sorted(da): + f.write("DA:{},{}\n".format(*v)) # Line + f.write("LF:{}\n".format(len(data["lines"]))) #Lines Found + f.write("LH:{}\n".format((len(data["lines"]) - line_miss))) #Lines Hit + f.write("end_of_record\n") + +def getSourceLines(source, fallback_encodings=[]): + """Return a list of lines from the provided source, trying to decode with fallback encodings if the default fails""" + default_encoding = sys.getdefaultencoding() + for encoding in [default_encoding] + fallback_encodings: + try: + with open(source, encoding=encoding) as f: + return f.readlines() + except UnicodeDecodeError: + pass + + logging.warning("Could not decode '{}' with {} or fallback encodings ({}); ignoring errors".format(source, default_encoding, ",".join(fallback_encodings))) + with open(source, errors="ignore") as f: + return f.readlines() + +def exclMarkerWorker(fastcov_sources, chunk, exclude_branches_sw, include_branches_sw, fallback_encodings): + for source in chunk: + start_line = 0 + end_line = 0 + # Start enumeration at line 1 because the first line of the file is line 1 not 0 + for i, line in enumerate(getSourceLines(source, fallback_encodings), 1): + # Cycle through test names (likely only 1) + for test_name in fastcov_sources[source]: + fastcov_data = fastcov_sources[source][test_name] + + # Build line to function dict so can quickly delete by line number + line_to_func = {} + for f in fastcov_data["functions"].keys(): + l = fastcov_data["functions"][f]["start_line"] + if l not in line_to_func: + line_to_func[l] = {} + line_to_func[l][f] = f + + if i in fastcov_data["branches"]: + del_exclude_br = exclude_branches_sw and any(line.lstrip().startswith(e) for e in exclude_branches_sw) + del_include_br = include_branches_sw and all(not line.lstrip().startswith(e) for e in include_branches_sw) + if del_exclude_br or del_include_br: + del fastcov_data["branches"][i] + + if "LCOV_EXCL" not in line: + continue + + if "LCOV_EXCL_LINE" in line: + for key in ["lines", "branches"]: + if i in fastcov_data[key]: + del fastcov_data[key][i] + if i in line_to_func: + for key in line_to_func[i]: + if fastcov_data["functions"][key]: + del fastcov_data["functions"][key] + elif "LCOV_EXCL_START" in line: + start_line = i + elif "LCOV_EXCL_STOP" in line: + end_line = i + + if not start_line: + end_line = 0 + continue + + for key in ["lines", "branches"]: + for line_num in list(fastcov_data[key].keys()): + if start_line <= line_num <= end_line: + del fastcov_data[key][line_num] + + for i in range(start_line, end_line): + if i in line_to_func: + for key in line_to_func[i]: + if fastcov_data["functions"][key]: + del fastcov_data["functions"][key] + + start_line = end_line = 0 + elif "LCOV_EXCL_BR_LINE" in line: + if i in fastcov_data["branches"]: + del fastcov_data["branches"][i] + +def scanExclusionMarkers(fastcov_json, jobs, exclude_branches_sw, include_branches_sw, min_chunk_size, fallback_encodings): + chunk_size = max(min_chunk_size, int(len(fastcov_json["sources"]) / jobs) + 1) + + threads = [] + for chunk in chunks(list(fastcov_json["sources"].keys()), chunk_size): + t = threading.Thread(target=exclMarkerWorker, args=(fastcov_json["sources"], chunk, exclude_branches_sw, include_branches_sw, fallback_encodings)) + threads.append(t) + t.start() + + logging.info("Spawned {} threads each scanning at most {} source files".format(len(threads), chunk_size)) + for t in threads: + t.join() + +def distillFunction(function_raw, functions): + function_name = function_raw["name"] + # NOTE: need to explicitly cast all counts coming from gcov to int - this is because gcov's json library + # will pass as scientific notation (i.e. 12+e45) + start_line = int(function_raw["start_line"]) + execution_count = int(function_raw["execution_count"]) + if function_name not in functions: + functions[function_name] = { + "start_line": start_line, + "execution_count": execution_count + } + else: + functions[function_name]["execution_count"] += execution_count + +def emptyBranchSet(branch1, branch2): + return (branch1["count"] == 0 and branch2["count"] == 0) + +def matchingBranchSet(branch1, branch2): + return (branch1["count"] == branch2["count"]) + +def filterExceptionalBranches(branches): + filtered_branches = [] + exception_branch = False + for i in range(0, len(branches), 2): + if i+1 >= len(branches): + filtered_branches.append(branches[i]) + break + + # Filter exceptional branch noise + if branches[i+1]["throw"]: + exception_branch = True + continue + + # Filter initializer list noise + if exception_branch and emptyBranchSet(branches[i], branches[i+1]) and len(filtered_branches) >= 2 and matchingBranchSet(filtered_branches[-1], filtered_branches[-2]): + return [] + + filtered_branches.append(branches[i]) + filtered_branches.append(branches[i+1]) + + return filtered_branches + +def distillLine(line_raw, lines, branches, include_exceptional_branches): + line_number = int(line_raw["line_number"]) + count = int(line_raw["count"]) + if line_number not in lines: + lines[line_number] = count + else: + lines[line_number] += count + + # Filter out exceptional branches by default unless requested otherwise + if not include_exceptional_branches: + line_raw["branches"] = filterExceptionalBranches(line_raw["branches"]) + + # Increment all branch counts + for i, branch in enumerate(line_raw["branches"]): + if line_number not in branches: + branches[line_number] = [] + blen = len(branches[line_number]) + glen = len(line_raw["branches"]) + if blen < glen: + branches[line_number] += [0] * (glen - blen) + branches[line_number][i] += int(branch["count"]) + +def distillSource(source_raw, sources, test_name, include_exceptional_branches): + source_name = source_raw["file_abs"] + if source_name not in sources: + sources[source_name] = { + test_name: { + "functions": {}, + "branches": {}, + "lines": {} + } + } + + for function in source_raw["functions"]: + distillFunction(function, sources[source_name][test_name]["functions"]) + + for line in source_raw["lines"]: + distillLine(line, sources[source_name][test_name]["lines"], sources[source_name][test_name]["branches"], include_exceptional_branches) + +def dumpToJson(intermediate, output): + with open(output, "w") as f: + json.dump(intermediate, f) + +def getGcovFilterOptions(args): + return { + "sources": set([os.path.abspath(s) for s in args.sources]), #Make paths absolute, use set for fast lookups + "include": args.includepost, + "exclude": args.excludepost, + } + +def addDicts(dict1, dict2): + """Add dicts together by value. i.e. addDicts({"a":1,"b":0}, {"a":2}) == {"a":3,"b":0}""" + result = {k:v for k,v in dict1.items()} + for k,v in dict2.items(): + if k in result: + result[k] += v + else: + result[k] = v + + return result + +def addLists(list1, list2): + """Add lists together by value. i.e. addLists([1,1], [2,2]) == [3,3]""" + # Find big list and small list + blist, slist = list(list2), list(list1) + if len(list1) > len(list2): + blist, slist = slist, blist + + # Overlay small list onto big list + for i, b in enumerate(slist): + blist[i] += b + + return blist + +def combineReports(base, overlay): + for source, scov in overlay["sources"].items(): + # Combine Source Coverage + if source not in base["sources"]: + base["sources"][source] = scov + continue + + for test_name, tcov in scov.items(): + # Combine Source Test Name Coverage + if test_name not in base["sources"][source]: + base["sources"][source][test_name] = tcov + continue + + # Drill down and create convenience variable + base_data = base["sources"][source][test_name] + + # Combine Line Coverage + base_data["lines"] = addDicts(base_data["lines"], tcov["lines"]) + + # Combine Branch Coverage + for branch, cov in tcov["branches"].items(): + if branch not in base_data["branches"]: + base_data["branches"][branch] = cov + else: + base_data["branches"][branch] = addLists(base_data["branches"][branch], cov) + + # Combine Function Coverage + for function, cov in tcov["functions"].items(): + if function not in base_data["functions"]: + base_data["functions"][function] = cov + else: + base_data["functions"][function]["execution_count"] += cov["execution_count"] + +def parseInfo(path): + """Parse an lcov .info file into fastcov json""" + fastcov_json = { + "sources": {} + } + + with open(path) as f: + for line in f: + if line.startswith("TN:"): + current_test_name = line[3:].strip() + elif line.startswith("SF:"): + current_sf = line[3:].strip() + fastcov_json["sources"][current_sf] = { + current_test_name: { + "functions": {}, + "branches": {}, + "lines": {}, + } + } + current_data = fastcov_json["sources"][current_sf][current_test_name] + elif line.startswith("FN:"): + line_num, function_name = line[3:].strip().split(",") + current_data["functions"][function_name] = {} + current_data["functions"][function_name]["start_line"] = int(line_num) + elif line.startswith("FNDA:"): + count, function_name = line[5:].strip().split(",") + current_data["functions"][function_name]["execution_count"] = int(count) + elif line.startswith("DA:"): + line_num, count = line[3:].strip().split(",") + current_data["lines"][line_num] = int(count) + elif line.startswith("BRDA:"): + branch_tokens = line[5:].strip().split(",") + line_num, count = branch_tokens[0], branch_tokens[-1] + if line_num not in current_data["branches"]: + current_data["branches"][line_num] = [] + current_data["branches"][line_num].append(int(count)) + + return fastcov_json + +def convertKeysToInt(report): + for source in report["sources"].keys(): + for test_name in report["sources"][source].keys(): + report_data = report["sources"][source][test_name] + report_data["lines"] = {int(k):v for k,v in report_data["lines"].items()} + report_data["branches"] = {int(k):v for k,v in report_data["branches"].items()} + +def parseAndCombine(paths): + base_report = {} + + for path in paths: + if path.endswith(".json"): + with open(path) as f: + report = json.load(f) + elif path.endswith(".info"): + report = parseInfo(path) + else: + sys.stderr.write("Currently only fastcov .json and lcov .info supported for combine operations, aborting due to {}...\n".format(path)) + sys.exit(3) + + # In order for sorting to work later when we serialize, + # make sure integer keys are int + convertKeysToInt(report) + + if not base_report: + base_report = report + logging.info("Setting {} as base report".format(path)) + else: + combineReports(base_report, report) + logging.info("Adding {} to base report".format(path)) + + return base_report + +def getCombineCoverage(args): + logging.info("Performing combine operation") + fastcov_json = parseAndCombine(args.combine) + filterFastcov(fastcov_json, args) + return fastcov_json + +def getGcovCoverage(args): + # Need at least python 3.5 because of use of recursive glob + checkPythonVersion(sys.version_info[0:2]) + + # Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced + checkGcovVersion(getGcovVersion(args.gcov)) + + # Get list of gcda files to process + coverage_files = findCoverageFiles(args.directory, args.coverage_files, args.use_gcno) + + # If gcda/gcno filtering is enabled, filter them out now + if args.excludepre: + coverage_files = getFilteredCoverageFiles(coverage_files, args.excludepre) + logging.info("Found {} coverage files after filtering".format(len(coverage_files))) + + # We "zero" the "counters" by simply deleting all gcda files + if args.zerocounters: + removeFiles(coverage_files) + logging.info("Removed {} .gcda files".format(len(coverage_files))) + sys.exit(0) + + # Fire up one gcov per cpu and start processing gcdas + gcov_filter_options = getGcovFilterOptions(args) + fastcov_json = processGcdas(args, coverage_files, gcov_filter_options) + + # Summarize processing results + logging.info("Processed {} .gcov files ({} total, {} skipped)".format(GCOVS_TOTAL - GCOVS_SKIPPED, GCOVS_TOTAL, GCOVS_SKIPPED)) + logging.debug("Final report will contain coverage for the following %d source files:\n %s", len(fastcov_json["sources"]), "\n ".join(fastcov_json["sources"])) + + return fastcov_json + +def dumpFile(fastcov_json, args): + if args.lcov: + dumpToLcovInfo(fastcov_json, args.output) + logging.info("Created lcov info file '{}'".format(args.output)) + else: + dumpToJson(fastcov_json, args.output) + logging.info("Created fastcov json file '{}'".format(args.output)) + +def tupleToDotted(tup): + return ".".join(map(str, tup)) + +def parseArgs(): + parser = argparse.ArgumentParser(description='A parallel gcov wrapper for fast coverage report generation') + parser.add_argument('-z', '--zerocounters', dest='zerocounters', action="store_true", help='Recursively delete all gcda files') + + # Enable Branch Coverage + parser.add_argument('-b', '--branch-coverage', dest='branchcoverage', action="store_true", help='Include only the most useful branches in the coverage report.') + parser.add_argument('-B', '--exceptional-branch-coverage', dest='xbranchcoverage', action="store_true", help='Include ALL branches in the coverage report (including potentially noisy exceptional branches).') + parser.add_argument('-A', '--exclude-br-lines-starting-with', dest='exclude_branches_sw', nargs="+", metavar='', default=[], help='Exclude branches from lines starting with one of the provided strings (i.e. assert, return, etc.)') + parser.add_argument('-a', '--include-br-lines-starting-with', dest='include_branches_sw', nargs="+", metavar='', default=[], help='Include only branches from lines starting with one of the provided strings (i.e. if, else, while, etc.)') + parser.add_argument('-X', '--skip-exclusion-markers', dest='skip_exclusion_markers', action="store_true", help='Skip reading source files to search for lcov exclusion markers (such as "LCOV_EXCL_LINE")') + parser.add_argument('-x', '--scan-exclusion-markers', dest='scan_exclusion_markers', action="store_true", help='(Combine operations) Force reading source files to search for lcov exclusion markers (such as "LCOV_EXCL_LINE")') + + # Capture untested file coverage as well via gcno + parser.add_argument('-n', '--process-gcno', dest='use_gcno', action="store_true", help='Process both gcno and gcda coverage files. This option is useful for capturing untested files in the coverage report.') + + # Filtering Options + parser.add_argument('-s', '--source-files', dest='sources', nargs="+", metavar='', default=[], help='Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory.') + parser.add_argument('-e', '--exclude', dest='excludepost', nargs="+", metavar='', default=[], help='Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.)') + parser.add_argument('-i', '--include', dest='includepost', nargs="+", metavar='', default=[], help='Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.)') + parser.add_argument('-f', '--gcda-files', dest='coverage_files', nargs="+", metavar='', default=[], help='Filter: Specify exactly which gcda or gcno files should be processed. Note that specifying gcno causes both gcno and gcda to be processed.') + parser.add_argument('-E', '--exclude-gcda', dest='excludepre', nargs="+", metavar='', default=[], help='Filter: Exclude gcda or gcno files from being processed via simple find matching (not regex)') + + parser.add_argument('-g', '--gcov', dest='gcov', default='gcov', help='Which gcov binary to use') + + parser.add_argument('-d', '--search-directory', dest='directory', default=".", help='Base directory to recursively search for gcda files (default: .)') + parser.add_argument('-c', '--compiler-directory', dest='cdirectory', default=".", help='Base directory compiler was invoked from (default: .) \ + This needs to be set if invoking fastcov from somewhere other than the base compiler directory.') + + parser.add_argument('-j', '--jobs', dest='jobs', type=int, default=multiprocessing.cpu_count(), help='Number of parallel gcov to spawn (default: {}).'.format(multiprocessing.cpu_count())) + parser.add_argument('-m', '--minimum-chunk-size', dest='minimum_chunk', type=int, default=5, help='Minimum number of files a thread should process (default: 5). \ + If you have only 4 gcda files but they are monstrously huge, you could change this value to a 1 so that each thread will only process 1 gcda. Otherwise fastcov will spawn only 1 thread to process all of them.') + + parser.add_argument('-F', '--fallback-encodings', dest='fallback_encodings', nargs="+", metavar='', default=[], help='List of encodings to try if opening a source file with the default fails (i.e. latin1, etc.). This option is not usually needed.') + + parser.add_argument('-l', '--lcov', dest='lcov', action="store_true", help='Output in lcov info format instead of fastcov json') + parser.add_argument('-o', '--output', dest='output', default="coverage.json", help='Name of output file (default: coverage.json)') + parser.add_argument('-q', '--quiet', dest='quiet', action="store_true", help='Suppress output to stdout') + + parser.add_argument('-t', '--test-name', dest='test_name', default="", help='Specify a test name for the coverage. Equivalent to lcov\'s `-t`.') + parser.add_argument('-C', '--add-tracefile', dest='combine', nargs="+", help='Combine multiple coverage files into one. If this flag is specified, fastcov will do a combine operation instead invoking gcov. Equivalent to lcov\'s `-a`.') + + parser.add_argument('-V', '--verbose', dest="verbose", action="store_true", help="Print more detailed information about what fastcov is doing") + parser.add_argument('-v', '--version', action="version", version='%(prog)s {version}'.format(version=__version__), help="Show program's version number and exit") + + args = parser.parse_args() + + return args + +def checkPythonVersion(version): + """Exit if the provided python version is less than the supported version""" + if version < MINIMUM_PYTHON: + sys.stderr.write("Minimum python version {} required, found {}\n".format(tupleToDotted(MINIMUM_PYTHON), tupleToDotted(version))) + sys.exit(1) + +def checkGcovVersion(version): + """Exit if the provided gcov version is less than the supported version""" + if version < MINIMUM_GCOV: + sys.stderr.write("Minimum gcov version {} required, found {}\n".format(tupleToDotted(MINIMUM_GCOV), tupleToDotted(version))) + sys.exit(2) + +def setupLogging(quiet, verbose): + handler = logging.StreamHandler() + handler.setFormatter(FastcovFormatter("[%(levelname)s]: %(message)s")) + + root = logging.getLogger() + root.setLevel(logging.INFO) + root.addHandler(handler) + + if not quiet: + logging.disable(level=logging.NOTSET) # Re-enable logging + + if verbose: + root.setLevel(logging.DEBUG) + +def main(): + args = parseArgs() + + # Setup logging + setupLogging(args.quiet, args.verbose) + + # Get report from appropriate source + if args.combine: + fastcov_json = getCombineCoverage(args) + skip_exclusion_markers = not args.scan_exclusion_markers + else: + fastcov_json = getGcovCoverage(args) + skip_exclusion_markers = args.skip_exclusion_markers + + # Scan for exclusion markers + if not skip_exclusion_markers: + scanExclusionMarkers(fastcov_json, args.jobs, args.exclude_branches_sw, args.include_branches_sw, args.minimum_chunk, args.fallback_encodings) + logging.info("Scanned {} source files for exclusion markers".format(len(fastcov_json["sources"]))) + + # Dump to desired file format + dumpFile(fastcov_json, args) + +# Set package version... it's way down here so that we can call tupleToDotted +__version__ = tupleToDotted(FASTCOV_VERSION) + +if __name__ == '__main__': + main()