#!/usr/bin/perl -w # # Lintian HTML reporting tool -- Create Lintian web reports # # Copyright (C) 1998 Christian Schwarz and Richard Braakman # Copyright (C) 2007 Russ Allbery # # This program is free software. It is distributed under the terms of # the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any # later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. use strict; use File::Copy qw(copy); use URI::Escape; use Text::Template (); # ------------------------------ # Global variables and configuration # These have no default and must be set in the configuration file. # FIXME: $statistics_file should be in all caps as well. our ($LINTIAN_ROOT, $LINTIAN_LAB, $LINTIAN_ARCHIVEDIR, $LINTIAN_DIST, $LINTIAN_ARCH, $HTML_TMP_DIR, $statistics_file, $LINTIAN_AREA, $HISTORY, $HISTORY_DIR, $LINTIAN_SOURCE); # Read the configuration. require './config'; # The path to the mirror timestamp. our $LINTIAN_TIMESTAMP = "$LINTIAN_ARCHIVEDIR/project/trace/ftp-master.debian.org"; # FIXME: At least the lab should be a parameter to Read_pkglists rather # than an environment variable. $ENV{'LINTIAN_LAB'} = $LINTIAN_LAB; $ENV{'LINTIAN_ROOT'} = $LINTIAN_ROOT; # Import Lintian Perl libraries. use lib "$ENV{LINTIAN_ROOT}/lib"; use Lintian::Lab; use Lintian::Profile; use Lintian::Internal::FrontendUtil; use Text_utils; use Util; my $profile = Lintian::Profile->new (find_default_profile ("$LINTIAN_ROOT/profiles"), $LINTIAN_ROOT, ["$LINTIAN_ROOT/profiles"]); # Set the Lintian version, current timestamp, and archive timestamp. our $LINTIAN_VERSION = `$LINTIAN_ROOT/frontend/lintian --print-version`; our $timestamp = `date -u --rfc-822`; our $mirror_timestamp = slurp_entire_file($LINTIAN_TIMESTAMP); chomp ($LINTIAN_VERSION, $timestamp); $mirror_timestamp =~ s/\n.*//s; # ------------------------------ # Initialize templates # The path to our templates. our $TEMPLATES = "$LINTIAN_ROOT/reporting/templates"; # This only has to be done once, so do it at the start and then reuse the same # templates throughout. our %templates; for my $template (qw/head foot clean index maintainer maintainers packages tag tags tags-severity tag-not-seen tags-all/) { my %options = (TYPE => 'FILE', SOURCE => "$TEMPLATES/$template.tmpl"); $templates{$template} = Text::Template->new (%options) or die "cannot load template $template: $Text::Template::ERROR\n"; } # ------------------------------ # Main routine my $LAB = Lintian::Lab->new ($LINTIAN_LAB); my $source_info; $LAB->open; $source_info = $LAB->_get_lab_index ('source'); # Create output directories. mkdir($HTML_TMP_DIR, 0777) or die "cannot create output directory $HTML_TMP_DIR: $!\n"; mkdir("$HTML_TMP_DIR/full", 0777) or die "cannot create output directory $HTML_TMP_DIR/full: $!\n"; mkdir("$HTML_TMP_DIR/maintainer", 0777) or die "cannot create output directory $HTML_TMP_DIR/maintainer: $!\n"; mkdir("$HTML_TMP_DIR/tags", 0777) or die "cannot create output directory $HTML_TMP_DIR/tags: $!\n"; symlink('.', "$HTML_TMP_DIR/reports") or die "cannot create symlink $HTML_TMP_DIR/reports: $!\n"; symlink("$LINTIAN_ROOT/doc/lintian.html", "$HTML_TMP_DIR/manual") or die "cannot create symlink $HTML_TMP_DIR/manual: $!\n"; if ($ARGV[0]) { symlink($ARGV[0], "$HTML_TMP_DIR/lintian.log") or die "cannot create symlink $HTML_TMP_DIR/lintian.log: $!\n"; system("gzip --best -c > \"$HTML_TMP_DIR/lintian.log.gz\" < \"$ARGV[0]\"") == 0 or die "cannot create $HTML_TMP_DIR/lintian.log.gz.\n"; } copy("$LINTIAN_ROOT/reporting/lintian.css", "$HTML_TMP_DIR/lintian.css") or die "cannot copy lintian.css to $HTML_TMP_DIR: $!\n"; for my $image (qw/ico.png l.png logo-small.png/) { copy("$LINTIAN_ROOT/reporting/images/$image", "$HTML_TMP_DIR/$image") or die "cannot copy images/$image to $HTML_TMP_DIR: $!\n"; } # %statistics accumulates global statistics. For tags: errors, warnings, # experimental, overridden, and info are the keys holding the count of tags of # that sort. For packages: binary, udeb, and source are the number of # packages of each type with Lintian errors or warnings. For maintainers: # maintainers is the number of maintainers with Lintian errors or warnings. # # %tag_statistics holds a hash of tag-specific statistics. Each tag name is a # key, and its value is a hash with the following keys: count and overrides # (number of times the tag has been detected and overriden, respectively), and # packages (number of packages with at least one such tag). my (%statistics, %tag_statistics); # %by_maint holds a hash of maintainer names to packages and tags. Each # maintainer is a key. The value is a hash of package names to hashes. Each # package hash is in turn a hash of versions to an anonymous array of hashes, # with each hash having keys code, package, type, tag, severity, certainty, # extra, and xref. xref gets the partial URL of the maintainer page for that # source package. # # In other words, the lintian output line: # # W: gnubg source: substvar-source-version-is-deprecated gnubg-data # # for gnubg 0.15~20061120-1 maintained by Russ Allbery is # turned into the following structure: # # { 'gnubg' => { # '0.15~20061120-1' => [ # { code => 'W', # package => 'gnubg', # version => '0.15~20061120-1', # area => 'main', # type => 'source', # tag => 'substvar-source-version-is-deprecated', # severity => 'normal', # certainty => 'certain', # extra => 'gnubg-data' # xref => 'rra@debian.org.html#gnubg' } ] } } # # and then stored under the key 'Russ Allbery ' # # %by_uploader holds the same thing except for packages for which the person # is only an uploader. # # %by_tag is a hash of tag names to an anonymous array of tag information # hashes just like the inside-most data structure above. my (%by_maint, %by_uploader, %by_tag); # We take a lintian log file on either standard input or as the first # argument. This log file contains all the tags lintian found, plus N: tags # with informational messages. Ignore all the N: tags and load everything # else into the hashes we use for all web page generation. # # We keep track of a hash from maintainer page URLs to maintainer values so # that we don't have two maintainers who map to the same page and overwrite # each other's pages. If we find two maintainers who map to the same URL, # just assume that the second maintainer is the same as the first (but warn # about it). my (%seen, %saw_maintainer); # Matches something like: (1:2.0-3) [arch1 arch2] # - captures the version and the architectures my $verarchre = qr,(?: \s* \(( [^)]++ )\) \s* \[ ( [^]]++ ) \]),xo; # ^^^^^^^^ ^^^^^^^^^^^^ # ( version ) [architecture ] # matches the full deal: # 1 222 3333 4444444 5555 666 777 # - T: pkg type (version) [arch]: tag [...] # ^^^^^^^^^^^^^^^^^^^^^ # Where the marked part(s) are optional values. The numbers above the example # are the capture groups. my $fullre = qr/([EWIXOP]): (\S+)(?: (\S+)(?:$verarchre)?)?: (\S+)(?:\s+(.*))?/o; while (<>) { chomp; next unless m/^$fullre/o; my ($code, $package, $type, $pver, $parch, $tag, $extra) = ($1, $2, $3, $4, $5, $6, $7); $type = 'binary' unless (defined $type); $extra = '' unless (defined $extra); next unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb'); # Ignore unknown tags - happens if we removed a tag that is still present # in the log file. next unless $profile->get_tag ($tag, 1); # Update statistics. my $key = { E => 'errors', W => 'warnings', I => 'info', X => 'experimental', O => 'overridden', P => 'pedantic', }->{$code}; $statistics{$key}++; unless ($seen{"$package $type"}) { $statistics{"$type-packages"}++; $seen{"$package $type"} = 1; } # Determine the source package for this package and warn if there appears # to be no source package in the archive. Determine the maintainer, # version, and archive area. Work around a missing source package by # pulling information from a binary package or udeb of the same name if # there is any. my ($source, $version, $area, $source_version, $maintainer, $uploaders); my $pkg_data = $LAB->_get_lab_manifest_data ($package, $type, $pver, $parch); if ($type eq 'source') { $source = $package; if (defined $pkg_data) { $source_version = $version = $pkg_data->{version}; $area = $pkg_data->{area}; $maintainer = $pkg_data->{maintainer}; $uploaders = $pkg_data->{uploaders}; } else { warn "source package $package not listed!\n"; $source_version = $version = $pver; } } else { my $src_data; if (defined $pkg_data) { $version = $pkg_data->{version}; $area = $pkg_data->{area}; $source = $pkg_data->{source}//$package; $source_version = $pkg_data->{'source-version'}//$version; $src_data = $LAB->_get_lab_manifest_data ($source, 'source', $source_version); } else { $version = $pver; } if (defined $src_data) { $maintainer = $src_data->{maintainer}; $uploaders = $src_data->{uploaders}; } else { warn "source for package $package not found!\n"; $maintainer = undef; $uploaders = undef; } } $maintainer ||= '(unknown)'; $area ||= 'main'; $source ||= ''; $version = 'unknown' unless (defined ($version) and length ($version) > 0); $source_version = $version unless (defined ($source_version) and length ($source_version) > 0); # Check if we've seen the URL for this maintainer before and, if so, map # them to the same person as the previous one. $maintainer = map_maintainer ($maintainer); $saw_maintainer{$maintainer} = 1; # Update maintainer statistics. $statistics{maintainers}++ unless defined $by_maint{$maintainer}; # Sanitize, just out of paranoia. $source =~ s/[^a-zA-Z0-9.+-]/_/g; $version =~ s/[^a-zA-Z0-9.+:~-]/_/g; # Add the tag information to our hashes. Share the data between the # hashes to save space (which means we can't later do destructive tricks # with it). my $info = { code => html_quote ($code), package => html_quote ($package), version => html_quote ($version), area => html_quote ($area), type => html_quote ($type), tag => html_quote ($tag), severity => html_quote ($profile->get_tag ($tag, 1)->severity), certainty => html_quote ($profile->get_tag ($tag, 1)->certainty), extra => html_quote ($extra), xref => maintainer_url ($maintainer) . "#$source" }; $by_maint{$maintainer}{$source}{$source_version} ||= []; push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info); $by_tag{$tag} ||= []; push(@{ $by_tag{$tag} }, $info); # If the package had uploaders listed, also add the information to # %by_uploaders (still sharing the data between hashes). if ($uploaders) { my @uploaders = split (/>\K\s*,\s*/, $uploaders); for (@uploaders) { my $uploader = map_maintainer ($_); next if $uploader eq $maintainer; $saw_maintainer{$uploader} = 1; $by_uploader{$uploader}{$source}{$source_version} ||= []; push(@{ $by_uploader{$uploader}{$source}{$source_version} }, $info); } } } # Build a hash of all maintainers, not just those with Lintian tags. We use # this later to generate stub pages for maintainers whose packages are all # Lintian-clean. my %clean; $source_info->visit_all (sub { my ($srcdata) = @_; my $maintainer = $srcdata->{maintainer}; my $id = maintainer_url ($maintainer); $clean{$id} = $maintainer; }); # Done with the lab and its metadata $LAB->close; undef $LAB; undef $source_info; # Now, walk through the tags by source package (sorted by maintainer). Output # a summary page of errors and warnings for each maintainer, output a full # page that includes info, experimental, and overriden tags, and assemble the # maintainer index and the QA package list as we go. my (%qa, %maintainers, %sources); my @maintainers; { my %unique; @maintainers = sort grep { !$unique{$_}++ } keys (%by_maint), keys (%by_uploader); } for my $maintainer (@maintainers) { my $id = maintainer_url ($maintainer); delete $clean{$id}; # For each of this maintainer's packages, add statistical information # about the number of each type of tag to the QA data and build the # packages hash used for the package index. We only do this for the # maintainer packages, not the uploader packages, to avoid # double-counting. for my $source (keys %{ $by_maint{$maintainer} }) { my %count; for my $version (keys %{ $by_maint{$maintainer}{$source} }) { my $tags = $by_maint{$maintainer}{$source}{$version}; for my $tag (@$tags) { $count{$tag->{code}}++; } if (@$tags) { $sources{$source} = $tags->[0]->{xref}; } } $qa{$source} = \%count; } # Determine if the maintainer's page is clean. Check all packages for # which they're either maintainer or uploader and set $error_clean if # they have no errors or warnings. # # Also take this opportunity to sort the tags so that all similar tags # will be grouped, which produces better HTML output. my $error_clean = 1; for my $source (keys %{ $by_maint{$maintainer} }, keys %{ $by_uploader{$maintainer} }) { my $versions = $by_maint{$maintainer}{$source} || $by_uploader{$maintainer}{$source}; for my $version (keys %$versions) { $versions->{$version} = [ sort by_tag @{ $versions->{$version} } ]; my $tags = $versions->{$version}; for my $tag (@$tags) { $error_clean = 0 if ($tag->{code} eq 'E'); $error_clean = 0 if ($tag->{code} eq 'W'); } } } # Determine the parts of the maintainer and the file name for the # maintainer page. my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/); $name = 'Unknown Maintainer' unless $name; $email = 'unknown' unless $email; my $regular = "maintainer/$id"; my $full = "full/$id"; # Create the regular maintainer page (only errors and warnings) and the # full maintainer page (all tags, including overrides and info tags). print "Generating page for $id\n"; my %data = ( email => html_quote (uri_escape ($email)), errors => 1, id => $id, maintainer => html_quote ($maintainer), name => html_quote ($name), packages => $by_maint{$maintainer}, uploads => $by_uploader{$maintainer}, ); my $template; if ($error_clean) { $template = $templates{clean}; } else { $template = $templates{maintainer}; } output_template ($regular, $template, \%data); $template = $templates{maintainer}; $data{errors} = 0; output_template ($full, $template, \%data); # Add this maintainer to the hash of maintainer to URL mappings. $maintainers{$maintainer} = $id; } # Write out the maintainer index. my %data = ( maintainers => \%maintainers, ); output_template ('maintainers.html', $templates{maintainers}, \%data); # Write out the QA package list. This is a space-delimited file that contains # the package name and then the error count, warning count, info count, # pedantic count, experimental count, and overridden tag count. open (QA, '>', "$HTML_TMP_DIR/qa-list.txt") or die "cannot create qa-list.txt: $!\n"; for my $source (sort keys %qa) { print QA $source; for my $code (qw/E W I P X O/) { my $count = $qa{$source}{$code} || 0; print QA " $count"; } print QA "\n"; } close QA or die "cannot write to qa-list: $!\n"; # Now, generate stub pages for every maintainer who has only clean packages. for my $id (keys %clean) { my $maintainer = $clean{$id}; my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/); $email = 'unknown' unless $email; my %maint_data = ( id => $id, email => html_quote (uri_escape ($email)), maintainer => html_quote ($maintainer), name => html_quote ($name), clean => 1, ); print "Generating clean page for $id\n"; output_template ("maintainer/$id", $templates{clean}, \%maint_data); output_template ("full/$id", $templates{clean}, \%maint_data); } # Create the pages for each tag. Each page shows the extended description for # the tag and all the packages for which that tag was issued. for my $tag (sort $profile->tags (1)) { my $info = $profile->get_tag ($tag, 1); my $description = $info->description('html', ' '); my ($count, $overrides) = (0, 0); my %seen_tags; my $tmpl = 'tag-not-seen'; if (exists $by_tag{$tag}) { $tmpl = 'tag'; foreach (@{$by_tag{$tag}}) { if ($_->{code} ne 'O') { $count++; $seen_tags{$_->{xref}}++; } else { $overrides++; } } $tag_statistics{$tag}{'count'} = $count; $tag_statistics{$tag}{'overrides'} = $overrides; $tag_statistics{$tag}{'packages'} = scalar keys %seen_tags; } my %maint_data = ( description => $description, tag => html_quote ($tag), code => $info->code, tags => $by_tag{$tag}, ); output_template ("tags/$tag.html", $templates{$tmpl}, \%maint_data); } # Create the general tag indices. %data = ( tags => \%by_tag, stats => \%tag_statistics, profile => \$profile, ); output_template ('tags.html', $templates{tags}, \%data); output_template ('tags-severity.html', $templates{'tags-severity'}, \%data); output_template ('tags-all.html', $templates{'tags-all'}, \%data); # Generate the package lists. These are huge, so we break them into four # separate pages. # # FIXME: Does anyone actually use these pages? They're basically unreadable. my %list; $list{'0-9, A-F'} = []; $list{'G-L'} = []; $list{'M-R'} = []; $list{'S-Z'} = []; for my $package (sort keys %sources) { my $first = uc substr($package, 0, 1); if ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) } elsif ($first le 'L') { push(@{ $list{'G-L'} }, $package) } elsif ($first le 'R') { push(@{ $list{'M-R'} }, $package) } else { push(@{ $list{'S-Z'} }, $package) } } %data = ( sources => \%sources, ); my $i = 1; for my $section (sort keys %list) { $data{section} = $section; $data{list} = $list{$section}; output_template ("packages_$i.html", $templates{packages}, \%data); $i++; } # Finally, we can start creating the index page. First, read in the old # statistics file so that we can calculate deltas for all of our statistics. my $old_statistics; if (-f $statistics_file) { ($old_statistics) = read_dpkg_control($statistics_file); } my %delta; my @attrs = qw(maintainers source-packages binary-packages udeb-packages errors warnings info experimental pedantic overridden); for my $attr (@attrs) { my $old = $old_statistics->{$attr} || 0; $statistics{$attr} ||= 0; $delta{$attr} = sprintf('%d (%+d)', $statistics{$attr}, $statistics{$attr} - $old); } # Update the statistics file. open (STATS, '>', $statistics_file) or die "cannot open $statistics_file for writing: $!\n"; print STATS "last-updated: $timestamp\n"; print STATS "mirror-timestamp: $mirror_timestamp\n"; for my $attr (@attrs) { print STATS "$attr: $statistics{$attr}\n"; } print STATS "lintian-version: $LINTIAN_VERSION\n"; close STATS or die "cannot write to $statistics_file: $!\n"; # Create the main page. %data = ( architecture => $LINTIAN_ARCH, delta => \%delta, dist => $LINTIAN_DIST, mirror => $mirror_timestamp, previous => $old_statistics->{'last-updated'}, area => join(', ', split(/\s*,\s*/, $LINTIAN_AREA)), ); output_template ('index.html', $templates{index}, \%data); exit 0 if (not $HISTORY); # Update history. my $unix_time = time(); mkdir("$HISTORY_DIR") or die "cannot create history directory $HISTORY_DIR: $!\n" if (not -d "$HISTORY_DIR"); mkdir("$HISTORY_DIR/tags") or die "cannot create tag history directory $HISTORY_DIR/tags: $!\n" if (not -d "$HISTORY_DIR/tags"); my $history_file = "$HISTORY_DIR/statistics.dat"; my $stats = ''; for my $attr (@attrs) { $stats .= " $statistics{$attr}"; } open(HIST, '>>', $history_file) or die "cannot open history file: $!\n"; print HIST "$unix_time $LINTIAN_VERSION$stats\n"; close HIST or die "cannot write to $history_file: $!\n"; for my $tag (sort keys %tag_statistics) { $history_file = "$HISTORY_DIR/tags/$tag.dat"; $stats = $tag_statistics{$tag}; open(HIST, '>>', $history_file) or die "cannot open tag history file $history_file: $!\n"; print HIST "$unix_time $stats->{'count'} $stats->{'overrides'} " . "$stats->{'packages'}\n"; close HIST or die "cannot write to $history_file: $!\n"; } exit 0; # ------------------------------ # Utility functions # Determine the file name for the maintainer page given a maintainer. It # should be .html where is their email address with all # characters other than a-z A-Z 0-9 - _ . @ = + replaced with _. Don't change # this without coordinating with QA. sub maintainer_url { my ($maintainer) = @_; my ($email) = ($maintainer =~ /<([^>]+)>/); my ($regular, $full); if ($email) { my $id = $email; $id =~ tr/a-zA-Z0-9_.@=+-/_/c; return "$id.html"; } else { return 'unsorted.html'; } } # Deduplicate maintainers. Maintains a cache of the maintainers we've seen # with a given e-mail address, issues a warning if two maintainers have the # same e-mail address, and returns the maintainer string that we should use # (which is whatever maintainer we saw first with that e-mail). { my (%urlmap, %warned); sub map_maintainer { my ($maintainer) = @_; my $url = maintainer_url ($maintainer); if ($urlmap{$url} && $urlmap{$url} ne $maintainer) { warn "$maintainer has the same page as $urlmap{$url}\n" unless ($warned{$maintainer} || lc ($maintainer) eq lc ($urlmap{$url}) || $maintainer =~ /\@lists\.(alioth\.)?debian\.org>/); $warned{$maintainer}++; $maintainer = $urlmap{$url}; } else { $urlmap{$url} = $maintainer; } return $maintainer; } } # Quote special characters for HTML output. sub html_quote { my ($text) = @_; $text ||= ''; $text =~ s/&/\&/g; $text =~ s//\>/g; return $text; } # Given a file name, a template, and a data hash, fill out the template with # that data hash and output the results to the file. sub output_template { my ($file, $template, $data) = @_; $data->{version} ||= $LINTIAN_VERSION; $data->{timestamp} ||= $timestamp; $data->{head} ||= sub { $templates{head}->fill_in (HASH => { page_title => $_[0], path_prefix => '../' x ($_[1]||0), %$data }) }; $data->{foot} ||= sub { $templates{foot}->fill_in (HASH => { LINTIAN_SOURCE => $LINTIAN_SOURCE, %$data }) }; open (OUTPUT, '>', "$HTML_TMP_DIR/$file") or die "creating $HTML_TMP_DIR/$file falied: $!\n"; $template->fill_in (OUTPUT => \*OUTPUT, HASH => $data) or die "filling out $file failed: $Text::Template::ERROR\n"; close OUTPUT; } # Sort function for sorting lists of tags. Sort by package, version, area, # type, tag, and then any extra data. This will produce the best HTML output. # # Note that source tags must come before all other tags, hench the "unfair" # priority for those. This is because the first tags listed are assumed to # be source package tags. sub by_tag { if ($a->{type} ne $b->{type}) { return -1 if $a->{type} eq 'source'; return 1 if $b->{type} eq 'source'; } return $a->{package} cmp $b->{package} || $a->{version} cmp $b->{version} || $a->{area} cmp $b->{area} || $a->{type} cmp $b->{type} || $a->{tag} cmp $b->{tag} || $a->{extra} cmp $b->{extra}; } # Local Variables: # indent-tabs-mode: nil # cperl-indent-level: 4 # End: # vim: syntax=perl sw=4 sts=4 sr et