blob: ed3cf553f597f69d35551f541aa4bf9046ad3ca3 [file] [log] [blame]
#!/usr/bin/env perl
#-----------------------------------------------------------------
# Quick and dirty script to summarize build information for a
# set of nightly runs.
#
# The results of the nighly regression runs are extracted from
# the GMANE mail archive. The URL for a given mail sent to the
# valgrind-developers mailing list is
#
# http://article.gmane.org/gmane.comp.debugging.valgrind.devel/<integer>
#
# The script extracts information about the regression run from a
# block of information at the beginning of the mail. That information
# was added beginning October 4, 2011. Therefore, only regression runs
# from that date or later can be analyzed.
#
# There is unfortunately no good way of figuring out the interval
# of integers in the above URL that include all nightly regression
# runs.
#
# The function get_regtest_data does all the work. It returns a hash
# whose keys are the dates at which nightly runs took place. The value
# is in turn a hash.
#
# Each such hash has the following keys:
# "builds" array of hashes
# "num_builds" int
# "num_failing_builds" int
# "num_passing_builds" int
# "num_testcase_failures" int
# "num_failing_testcases" int
# "failure_frequency" hash indexed by testcase name; value = int
#
# "builds" is an array of hashes with the following keys
# "arch" string (architecture)
# "distro" string (distribution, e.g. Fedora-15)
# "failures" array of strings (failing testcases)
# "valgrind revision" integer
# "VEX revision" integer
# "GCC version" string
# "C library" string
# "uname -mrs" string
# "Vendor version" string
#
#-----------------------------------------------------------------
use strict;
use warnings;
use LWP::Simple;
use Getopt::Long;
my $prog_name = "nightly-build-summary";
my $debug = 0;
my $keep = 0;
my $usage=<<EOF;
USAGE
$prog_name
--from=INTEGER beginning of mail interval; > 14800
[--to=INTEGER] end of mail interval; default = from + 100
[--debug] verbose mode (debugging)
[--keep] write individual emails to files (debugging)
[--dump] write results suitable for post-processing
[--readable] write results in human readable form (default)
EOF
#-----------------------------------------------------------------
# Search for a line indicating that this is an email containing
# the results of a valgrind regression run.
# Return 1, if found and 0 oherwise.
#-----------------------------------------------------------------
sub is_regtest_result {
my (@lines) = @_;
foreach my $line (@lines) {
return 1 if ($line =~ "^valgrind revision:");
}
return 0;
}
#-----------------------------------------------------------------
# Extract information from the run. Don't prep the data here. This
# is done later on.
#-----------------------------------------------------------------
sub get_raw_data {
my (@lines, $msgno) = @_;
my ($i, $n, $line, $date);
$n = scalar @lines;
my %hash = ();
# 1) Locate the section with the info about the environment of this nightly run
for ($i = $i + 1; $i < $n; ++$i) {
last if ($lines[$i] =~ /^valgrind revision:/);
}
die "no info block in message $msgno" if ($i == $n);
# 2) Read the info about the build: compiler, valgrind revision etc.
# and put it into a hash.
for ( ; $i < $n; ++$i) {
$line = $lines[$i];
last if ($line =~ /^$/); # empty line indicates end of section
my ($key, $value) = split(/:/, $line);
$value =~ s/^[ ]*//; # removing leading blanks
$hash{$key} = $value;
}
if ($debug) {
foreach my $key (keys %hash) {
my ($val) = $hash{$key};
print "regtest env: KEY = |$key| VAL = |$val|\n";
}
}
# 3) Get the date from when the build was kicked off.
for ( ; $i < $n; ++$i) {
$line = $lines[$i];
if ($line =~ /^Started at[ ]+([^ ]+)/) {
$date = $1;
print "DATE = $date\n";
last;
}
}
die "no date found in message $msgno" if ($i == $n);
# 4) Find out if the regression run failed or passed
$hash{"failures"} = [];
for ($i = $i + 1; $i < $n; ++$i) {
$line = $lines[$i];
if ($line =~ /Running regression tests/) {
return %hash if ($line =~ /done$/); # regtest succeeded; no failures
die "cannot determine regtest outcome for message $msgno"
if (! ($line =~ /failed$/));
last;
}
}
# 5) Regtest failed; locate the section with the list of failing testcases
for ($i = $i + 1; $i < $n; ++$i) {
$line = $lines[$i];
# Match for end-of-line == because line might be split.
last if ($line =~ /==$/);
}
die "cannot locate failing testcases in message $msgno" if ($i == $n);
# 6) Get list of failing testcases
for ($i = $i + 1; $i < $n; ++$i) {
$line = $lines[$i];
last if ($line =~ /^$/);
my ($testcase) = (split(/\s+/, $line))[0];
print "ADD failing testcase $testcase\n" if ($debug);
push @{$hash{"failures"}}, $testcase;
}
return ($date, %hash);
}
#-----------------------------------------------------------------
# Extract architecture; get a pretty name for the distro
#-----------------------------------------------------------------
sub prep_regtest_data {
my (%hash) = @_;
my ($val, $arch, $distro);
$val = $hash{"uname -mrs"};
die "uname -mrs info is missing" if (! defined $val);
$arch = (split(/ /, $val))[2];
$val = $hash{"Vendor version"};
die "Vendor version info is missing" if (! defined $val);
if ($val =~ /Fedora release ([0-9]+)/) {
$distro = "Fedora-$1";
} elsif ($val =~ /openSUSE ([0-9]+)\.([0-9]+)/) {
$distro = "openSUSE-$1.$2";
} elsif ($val =~ /SUSE Linux Enterprise Server 11 SP1/) {
$distro = "SLES-11-SP1";
} elsif ($val =~ /Red Hat Enterprise Linux AS release 4/) {
$distro = "RHEL-4";
} else {
$distro = "UNKNOWN";
}
# Add architecture and distribution to hash
$hash{"arch"} = $arch;
$hash{"distro"} = $distro;
return %hash;
}
#-----------------------------------------------------------------
# Precompute some summary information and record it
#-----------------------------------------------------------------
sub precompute_summary_info
{
my (%dates) = @_;
foreach my $date (sort keys %dates) {
my %failure_frequency = ();
my %nightly = %{ $dates{$date} };
my @builds = @{ $nightly{"builds"} };
$nightly{"num_builds"} = scalar (@builds);
$nightly{"num_failing_builds"} = 0;
$nightly{"num_testcase_failures"} = 0;
foreach my $build (@builds) {
my %regtest_data = %{ $build };
my @failures = @{ $regtest_data{"failures"} };
my $num_fail = scalar (@failures);
++$nightly{"num_failing_builds"} if ($num_fail != 0);
$nightly{"num_testcase_failures"} += $num_fail;
# Compute how often a testcase failed
foreach my $test ( @failures ) {
if (defined $failure_frequency{$test}) {
++$failure_frequency{$test};
} else {
$failure_frequency{$test} = 1;
}
}
}
$nightly{"num_passing_builds"} =
$nightly{"num_builds"} - $nightly{"num_failing_builds"};
$nightly{"num_failing_testcases"} = scalar (keys %failure_frequency);
$nightly{"failure_frequency"} = { %failure_frequency };
$dates{$date} = { %nightly };
}
return %dates;
}
#-----------------------------------------------------------------
# Get messages from GMANE, and build up a database of results.
#-----------------------------------------------------------------
sub get_regtest_data {
my ($from, $to) = @_;
my $url_base = "http://article.gmane.org/gmane.comp.debugging.valgrind.devel/";
my %dates = ();
my $old_date = "-1";
my @builds = ();
for (my $i = $from; $i <= $to; ++$i) {
my $url = "$url_base" . "$i";
my $page = get("$url");
if ($keep) {
open (EMAIL, ">$i");
print EMAIL $page;
close(EMAIL);
}
# Detect if the article does not exist. Happens for too large --to= values
last if ($page eq "No such file.\n");
# Split the page into lines
my @lines = split(/\n/, $page);
# Check whether it contains a regression test result
next if (! is_regtest_result(@lines));
print "message $i is a regression test result\n" if ($debug);
# Get the raw data
my ($date, %regtest_data) = get_raw_data(@lines);
%regtest_data = prep_regtest_data(%regtest_data);
if ($date ne $old_date) {
my %nightly = ();
$nightly{"builds"} = [ @builds ];
$dates{$old_date} = { %nightly } if ($old_date ne "-1");
$old_date = $date;
@builds = ();
}
push @builds, { %regtest_data };
}
my %nightly = ();
$nightly{"builds"} = [ @builds ];
$dates{$old_date} = { %nightly } if ($old_date ne "-1");
# Convenience: precompute some info we'll be interested in
%dates = precompute_summary_info( %dates );
return %dates;
}
#-----------------------------------------------------------------
# Write out the results in a form suitable for automatic post-processing
#-----------------------------------------------------------------
sub dump_results {
my (%dates) = @_;
foreach my $date (sort keys %dates) {
my %nightly = %{ $dates{$date} };
my @builds = @{ $nightly{"builds"} };
foreach my $build (@builds) {
my %regtest_data = %{ $build };
my $arch = $regtest_data{"arch"};
my $distro = $regtest_data{"distro"};
my @failures = @{ $regtest_data{"failures"} };
my $num_fail = scalar (@failures);
my $fails = join(":", sort @failures);
printf("Regrun: %s %3d %-10s %-20s %s\n",
$date, $num_fail, $arch, $distro, $fails);
}
my %failure_frequency = %{ $nightly{"failure_frequency"} };
foreach my $test (keys %failure_frequency) {
printf("Test: %s %3d %s\n",
$date, $failure_frequency{$test}, $test);
}
printf("Total: %s builds: %d %d fail %d pass tests: %d fail %d unique\n",
$date, $nightly{"num_builds"}, $nightly{"num_failing_builds"},
$nightly{"num_passing_builds"}, $nightly{"num_testcase_failures"},
$nightly{"num_failing_testcases"});
}
}
sub write_readable_results {
my (%dates) = @_;
foreach my $date (sort keys %dates) {
my %nightly = %{ $dates{$date} };
print "$date\n----------\n";
printf("%3d builds\n", $nightly{"num_builds"});
printf("%3d builds fail\n", $nightly{"num_failing_builds"});
printf("%3d builds pass\n", $nightly{"num_passing_builds"});
print "\n";
printf("%3d testcase failures (across all runs)\n",
$nightly{"num_testcase_failures"});
printf("%3d failing testcases (unique)\n",
$nightly{"num_failing_testcases"});
print "\n";
my @builds = @{ $nightly{"builds"} };
if ($nightly{"num_passing_builds"} != 0) {
print "Passing builds\n";
print "--------------\n";
foreach my $build (@builds) {
my %regtest_data = %{ $build };
my @failures = @{ $regtest_data{"failures"} };
my $num_fail = scalar (@failures);
if ($num_fail == 0) {
my $arch = $regtest_data{"arch"};
my $distro = $regtest_data{"distro"};
printf("%-8s %-15s\n", $arch, $distro);
}
print "\n";
}
print "\n";
}
if ($nightly{"num_failing_builds"} != 0) {
print "Failing builds\n";
print "--------------\n";
foreach my $build (@builds) {
my %regtest_data = %{ $build };
my @failures = @{ $regtest_data{"failures"} };
my $num_fail = scalar (@failures);
if ($num_fail != 0) {
my $arch = $regtest_data{"arch"};
my $distro = $regtest_data{"distro"};
printf("%-8s %-15s %d failures\n", $arch, $distro, $num_fail);
foreach my $test (@failures) {
print " $test\n";
}
print "\n";
}
}
print "\n";
}
print "Failing testcases and their frequency\n";
print "-------------------------------------\n";
my %failure_frequency = %{ $nightly{"failure_frequency"} };
# Sorted in decreasing frequency
foreach my $test (sort {$failure_frequency{$b} cmp $failure_frequency{$a} }
keys %failure_frequency) {
printf("%3d %s\n", $failure_frequency{$test}, $test);
}
print "\n";
}
}
sub main
{
my ($from, $to, $dump, $readable);
$from = $to = 0;
$dump = $readable = 0;
GetOptions( "from=i" => \$from,
"to=i" => \$to,
"debug" => \$debug,
"dump" => \$dump,
"keep" => \$keep,
"readable" => \$readable
) || die $usage;
# 14800 is about Oct 4, 2011 which is when we began including information
# about the environment
die $usage if ($from < 14800);
$to = $from + 100 if ($to == 0);
if ($from > $to) {
print STDERR "*** invalid [from,to] interval. Try again\n";
die $usage;
}
$readable = 1 if ($dump == 0 && $readable == 0);
print "check message interval [$from...$to]\n" if ($debug);
# Get mails from GMANE mail archive
my %dates = get_regtest_data($from, $to);
dump_results(%dates) if ($dump);
write_readable_results(%dates) if ($readable);
}
main();
exit 0;