#!/usr/bin/perl
#
# Lintian reporting harness -- Create and maintain Lintian reports automatically
#
# Copyright (C) 1998 Christian Schwarz and Richard Braakman
#
# This program is free software.  It is distributed under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;
use Getopt::Long;


sub usage {
  print <<END;
Lintian reporting harness
Create and maintain Lintian reports automatically

Usage: harness [ -i | -f | -r | -c ]

Options:
  -c         clean mode, erase everything and start from scratch (implies -f)
  -f         full mode, blithely overwrite lintian.log
  -i         incremental mode, use old lintian.log data, process changes only
  -r         generate HTML reports only
  --dry-run  pretend to do the actions without actually doing them.  The
             "normal" harness output will go to stdout rather than the
             harness.log.

Incremental mode is the default if you have a lintian.log;
otherwise, it's full.

Report bugs to <lintian-maint\@debian.org>.
END
#'# for cperl-mode
  exit;
}

my %opt = ();

my %opthash = (
    'i' => \$opt{'incremental-mode'},
    'c' => \$opt{'clean-mode'},
    'f' => \$opt{'full-mode'},
    'r' => \$opt{'reports-only'},
    'dry-run' => \$opt{'dry-run'},
    'help|h' => \&usage,
);

# init commandline parser
Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

# process commandline options
GetOptions(%opthash)
    or die("error parsing options\n");

# clean implies full - do this as early as possible, so we can just
# check $opt{'full-mode'} rather than a full
#   ($opt{'clean-mode'} || $opt{'full-mode'})
$opt{'full-mode'} = 1 if $opt{'clean-mode'};

die "Cannot use both incremental and full/clean.\n" if $opt{'incremental-mode'} && $opt{'full-mode'};
die "Cannot use other modes with reports only.\n"
    if $opt{'reports-only'} && ($opt{'full-mode'} || $opt{'incremental-mode'});

# read configuration
require './config';
use vars qw($LINTIAN_ROOT $LINTIAN_LAB $LINTIAN_ARCHIVEDIR $LINTIAN_DIST
            $LINTIAN_ARCH $LINTIAN_CFG
            $lintian_cmd $html_reports_cmd
            $log_file $lintian_log $old_lintian_log
            $changes_file $list_file $html_reports_log
            $LOG_DIR $statistics_file
            $HTML_DIR $HTML_TMP_DIR $LINTIAN_BIN_DIR $LINTIAN_GPG_CHECK
            $LINTIAN_AREA);

# export Lintian's configuration
$ENV{'LINTIAN_ROOT'} = $LINTIAN_ROOT;
$ENV{'LINTIAN_CFG'} = $LINTIAN_CFG;
$ENV{'LINTIAN_LAB'} = $LINTIAN_LAB;
$ENV{'LINTIAN_ARCHIVEDIR'} = $LINTIAN_ARCHIVEDIR;
$ENV{'LINTIAN_DIST'} = $LINTIAN_DIST;
$ENV{'LINTIAN_AREA'} = $LINTIAN_AREA;
$ENV{'LINTIAN_ARCH'} = $LINTIAN_ARCH;

# import perl libraries
unshift @INC, "$LINTIAN_ROOT/lib";
require Util;
require Lintian::Lab;
require Lintian::Lab::Manifest;
require Lintian::Processable::Package;

# turn file buffering off
$| = 1;

unless ($opt{'dry-run'}) {
    # rotate log files
    system("savelog $log_file $changes_file $list_file $html_reports_log >/dev/null") == 0
        or die "Cannot rotate log files.\n";

    # create new log file
    open(LOG, '>', $log_file)
        or die "cannot open log file $log_file for writing: $!";
} else {
    open LOG, '>&', \*STDOUT
        or die "Cannot open log file <stdout> for writing: $!";
    Log('Running in dry-run mode');
}
# From here on we can use Log() and Die().

unless ($opt{'dry-run'}) {
    system("mkdir -p -m 775 $LINTIAN_BIN_DIR") == 0 || die "$!";

    if ($LINTIAN_GPG_CHECK) {
        foreach my $g (qw(gpg gpgv)) {
            if (-l "$LINTIAN_BIN_DIR/$g") {
                unlink "$LINTIAN_BIN_DIR/$g";
            } else {
                rename "$LINTIAN_BIN_DIR/$g", "$LINTIAN_BIN_DIR/${g}.bkp";
            }
        }
    } else {
        foreach my $g (qw(gpg gpgv)) {
            symlink '/bin/true', "$LINTIAN_BIN_DIR/$g"
                unless -f "$LINTIAN_BIN_DIR/$g";
        }
        # Passed to coll/unpacked
        $ENV{'LINTIAN_COLL_UNPACKED_SKIP_SIG'} = 1;
    }
}

$ENV{'PATH'} = $LINTIAN_BIN_DIR . ':' . $ENV{'PATH'};


my $LAB = Lintian::Lab->new ($LINTIAN_LAB);

unless ($opt{'dry-run'}) {
    # purge the old packages
    $LAB->remove if $opt{'clean-mode'};

    $LAB->create ({ 'mode' => 02775}) unless $LAB->exists;
} else {
    if (! $LAB->exists || $opt{'clean-mode'}) {
        # We either do not have a lab or we were asked to clean
        # the existing one.  We solve this by creating a temp
        # lab (which will be empty).  This means that A) the lab
        # will appear to be empty (as expected by clean-mode) and
        # B) that we do not have to do a dry-run check on every
        # "read-only" lab operation (we still have to guard write
        # operations).
        $LAB = Lintian::Lab->new;
        $LAB->create;
    }
}

if (!$opt{'reports-only'} && !$opt{'full-mode'} && !$opt{'incremental-mode'}) {
    # Nothing explicitly chosen, default to -i if the log is present,
    # otherwise -f.
    if (-f $lintian_log) {
        $opt{'incremental-mode'} = 1;
    } else {
        $opt{'full-mode'} = 1;
    }
}

unless ($opt{'reports-only'}) {
    $LAB->open;
    my @manifests = local_mirror_manifests ($LINTIAN_ARCHIVEDIR, [_trim_split ($LINTIAN_DIST)],
                                            [_trim_split ($LINTIAN_AREA)], [_trim_split ($LINTIAN_ARCH)]);
    my @diffs = $LAB->generate_diffs (@manifests);
    my %skip = ();
    my @inc;
    # Use the FullEWI output as it is less ambiguous for html_reports - it shouldn't make a difference
    # but still...
    my $cmd ="$lintian_cmd -I -E --pedantic -v --show-overrides -U changelog-file".
        " --exp-output=format=fullewi";
    # Remove old/stale packages from the lab
    foreach my $diff (@diffs) {
        my $type = $diff->type;
        Log ("Removing old or changed $type packages from the lab");
        foreach my $removed (@{ $diff->removed }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$removed;
            my $entry;
            my $sk = "$type:$pkg_name/$pkg_version";
            $sk .= "/$pkg_arch" if $pkg_arch;
            $skip{$sk} = 1; # For log-cleaning (incremental runs)
            unless ($opt{'dry-run'}) {
                $entry = $LAB->get_package ($pkg_name, $type, $pkg_version, $pkg_arch);
            }
            if ($opt{'dry-run'} || $entry) {
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'} || $entry->remove) {
                    Log ("Removed $type $pkg_name ($pkg_version)$arch");
                } else {
                    Log ("Removing $type $pkg_name ($pkg_version)$arch failed.");
                }
            }
        }
        Log ("Adding new and changed $type packages to the lab");
        foreach my $added (@{ $diff->added }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$added;
            my $man = $diff->nlist;
            my $me = $man->get (@$added);
            my $file = $me->{'file'};
            my $proc;
            my $entry;
            unless ($opt{'dry-run'}) {
                eval {
                    $proc = Lintian::Processable::Package->new ($type, $file);
                };
                unless ($proc) {
                    my $name = "$type:$pkg_name/$pkg_version";
                    $name .= "/$pkg_arch" if $pkg_arch;
                    # Handle newlines in the error message.
                    $@ =~ s/\n*$//; $@ =~ s/\n/ /og;
                    Log ("Skipping $name due to errors ($@)");
                    next;
                }

                $entry = $LAB->get_package ($proc);
            }
            if ($opt{'dry-run'} || $entry) {
                my $ok = 0;
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'}) {
                    $ok = 1;
                } else {
                    eval {
                        $entry->create;
                        $entry->update_status_file or
                            die "creating status file: $!";
                        $ok = 1;
                    };
                }
                if ($ok) {
                    my $query = "$type:$pkg_name/$pkg_version";
                    $query .= "/$pkg_arch" if $pkg_arch;
                    Log ("Added $type $pkg_name ($pkg_version)$arch");
                    push @inc, $query;
                } else {
                    Log ("Adding $type $pkg_name ($pkg_version)$arch failed: $@");
                }
            }
        }
    }

    # Flushes the changed manifest to the file system - croaks on
    # error
    # - no need to check dry-run here as nothing changed and it frees
    #   memory to do this.
    # - in the (hopefully unlikely) case that dry-run is *buggy* and
    #   the lab actually was modified, then this will at least keep
    #   the lab metadata consistent with the actual contents.
    $LAB->close;

    if ($opt{'incremental-mode'}) {
        # Extra work for the incremental run

        die "Old Lintian log file $lintian_log not found!\n" unless -f $lintian_log;

        # update lintian.log
        Log('Updating lintian.log...');
        my $nfd;
        if ($opt{'dry-run'}) {
            open $nfd, '>', '/dev/null'
                or Die ("cannot open lintian.log /dev/null for writing: $!");
        } else {
            rename $lintian_log, $old_lintian_log
                or Die ("cannot rename lintian.log to $old_lintian_log: $!");
            open $nfd, '>', $lintian_log
                or Die ("cannot open lintian.log $lintian_log for writing: $!");
        }
        open my $ofd, '<', $old_lintian_log
            or Die ("cannot open old lintian.log $old_lintian_log for reading: $!");
        my $copy_mode = 1;
        while (<$ofd>) {
            if (/^N: Processing (binary|udeb|source) package (\S+) \(version (\S+), arch (\S+)\) \.\.\./o) {
                my ($type, $pkg, $ver, $arch) = ($1,$2, $3, $4);
                my $k = "$type:$pkg/$ver";
                $k .= "/$arch" if $type ne 'source';
                $copy_mode = 1;
                $copy_mode = 0 if exists $skip{$k};
            }
            if ($copy_mode) {
                print $nfd $_;
            }
        }
        print $nfd "N: ---end-of-old-lintian-log-file---\n";
        close $nfd;
        close $ofd;
        Log ('');
        if (@inc) {
            Log ('Creating work list for lintian');
            unless ($opt{'dry-run'}) {
                open my $lfd, '>', $list_file
                    or Die ("opening $list_file: $!");
                foreach my $query (@inc) {
                    print $lfd "!query: $query\n";
                }
                close $lfd;
            }
            Log ('');

            # incremental run cmd changes
            Log ('Running Lintian over newly introduced and changed packages...');
            $cmd .= " --packages-from-file $list_file >>$lintian_log 2>&1";
        } else {
            $cmd = undef;
            Log ('Skipping Lintian run - nothing to do...');
        }
    } else {
        # full run cmd changes
        Log('Running Lintian over all packages...');
        $cmd .= " -a >$lintian_log 2>&1";
    }

    if ($cmd) {
        Log("Executing $cmd");
        unless ($opt{'dry-run'}) {
            my $res = (system($cmd) >> 8);
            (($res == 0) or ($res == 1))
                or Log("warning: executing lintian returned $res");
        }
        Log('');
    }
}

# create html reports
Log('Creating HTML reports...');
run("$html_reports_cmd $lintian_log >$html_reports_log 2>&1")
    or Log("warning: executing $html_reports_cmd returned " . (($? >> 8) & 0xff));
Log('');

# rotate the statistics file updated by $html_reports_cmd
if (!$opt{'dry-run'} && -f $statistics_file) {
    system("cp $statistics_file $LOG_DIR/stats/statistics-`date +%Y%m%d`") == 0
        or Log('warning: could not rotate the statistics file');
}

# install new html directory
Log('Installing HTML reports...');
unless ($opt{'dry-run'}) {
    system("rm -rf $HTML_DIR") == 0
        or Die("error removing $HTML_DIR");
    # a tiny bit of race right here
    rename($HTML_TMP_DIR,$HTML_DIR)
        or Die("error renaming $HTML_TMP_DIR into $HTML_DIR");
}
Log('');

# ready!!! :-)
Log('All done.');
exit 0;

# -------------------------------

sub Log {
    print LOG $_[0],"\n";
}

sub run {
    Log("Executing $_[0]");
    return 1 if $opt{'dry-run'};
    return (system($_[0]) == 0);
}

sub Die {
    Log("fatal error: $_[0]");
    exit 1;
}

sub _trim_split {
    my ($val) = @_;
    return () unless $val;
    $val =~ s/^\s++//o;
    $val =~ s/\s++$//o;
    return split m/\s*+,\s*+/o, $val;
}

# local_mirror_manifests ($mirdir, $dists, $areas, $archs)
#
# Returns a list of manifests that represents what is on the local mirror
# at $mirdir.  3 manifests will be returned, one for "source", one for "binary"
# and one for "udeb" packages.  They are populated based on the "Sources" and
# "Packages" files.
#
# $mirdir - the path to the local mirror
# $dists  - listref of dists to consider (i.e. ['unstable'])
# $areas  - listref of areas to consider (i.e. ['main', 'contrib', 'non-free'])
# $archs  - listref of archs to consider (i.e. ['i386', 'amd64'])
#
sub local_mirror_manifests {
    my ($mirdir, $dists, $areas, $archs) = @_;
    my $active_srcs = {};
    my $srcman = Lintian::Lab::Manifest->new ('source');
    my $binman = Lintian::Lab::Manifest->new ('binary');
    my $udebman = Lintian::Lab::Manifest->new ('udeb');
    foreach my $dist (@$dists) {
        foreach my $area (@$areas) {
            my $srcs = "$mirdir/dists/$dist/$area/source/Sources";
            my $srcfd;
            my $srcsub;
            # Binaries have a "per arch" file.
            # - we check those first and then include the source packages that
            #   are referred to by these binaries.
            foreach my $arch (@$archs) {
                my $pkgs = "$mirdir/dists/$dist/$area/binary-$arch/Packages";
                my $upkgs = "$mirdir/dists/$dist/$area/debian-installer/" .
                    "binary-$arch/Packages";
                my $pkgfd = _open_data_file ($pkgs);
                my $binsub = sub { _parse_pkgs_pg ($active_srcs, $binman, $mirdir, $area, @_) };
                my $upkgfd = _open_data_file ($upkgs);
                my $udebsub = sub { _parse_pkgs_pg ($active_srcs, $udebman, $mirdir, $area, @_) };
                Util::_parse_dpkg_control_iterative ($binsub, $pkgfd);
                Util::_parse_dpkg_control_iterative ($udebsub, $upkgfd);
                close $pkgfd;
                close $upkgfd;
            }
            $srcfd = _open_data_file ($srcs);
            $srcsub = sub { _parse_srcs_pg ($active_srcs, $srcman, $mirdir, $area, @_) };
            Util::_parse_dpkg_control_iterative ($srcsub, $srcfd);
            close $srcfd;
        }
    }
    return ($srcman, $binman, $udebman);
}

# _open_data_file ($file)
#
# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens
# that instead.  It may pipe the file through a external decompressor, so the returned
# file descriptor cannot be assumed to be a file.
#
# If $file does not exists and no common extensions are found, this dies.  It may also
# die if it finds a file, but is unable to open it.
sub _open_data_file {
    my ($file) = @_;
    if (-e $file) {
        open my $fd, '<', $file or Die "opening $file: $!";
        return $fd;
    }
    foreach my $com (['gz', ['gzip', '-dc']] ){
        my ($ext, $cmd) = @$com;
        if ( -e "$file.$ext") {
            open my $c, '-|', @$cmd, "$file.$ext" or Die "running @$cmd $file.$ext";
            return $c;
        }
    }
    Die "Cannot find $file";
}

# Helper for local_mirror_manifests - it parses a paragraph from Packages file
sub _parse_pkgs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $s;
    unless ($data->{'source'}) {
        $data->{'source'} = $data->{'package'};
    } elsif ($data->{'source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) {
        $data->{'source'} = $1;
        $data->{'source-version'} = $2;
    } else {
        $data->{'source-version'} = $data->{'version'};
    }
    unless (defined $data->{'source-version'}) {
        $data->{'source-version'} = $data->{'version'};
    }
    $s = $data->{'source'} . '/' . $data->{'source-version'};
    $active_srcs->{$s}++;
    $data->{'file'} = $mirdir . '/' . $data->{'filename'};
    $data->{'area'} = $area;
    # $manifest strips redundant fields for us.  But for clarity and to
    # avoid "hard to debug" cases $manifest renames the fields, we explicitly
    # remove the "filename" field.
    delete $data->{'filename'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    $manifest->set ($data);
}

# Helper for local_mirror_manifests - it parses a paragraph from Sources file
sub _parse_srcs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $dir = $data->{'directory'}//'';
    my $s = $data->{'package'} . '/' . $data->{'version'};
    # only include the source if it has any binaries to be checked.
    # - Otherwise we may end up checking a source with no binaries
    #   (happens if the architecture is "behind" in building)
    return unless $active_srcs->{$s};
    $dir .= '/' if $dir;
    foreach my $f (split m/\n/, $data->{'files'}) {
        $f =~ s/^\s++//o;
        next unless $f && $f =~ m/\.dsc$/;
        my (undef, undef, $file) = split m/\s++/, $f;
        # $dir should end with a slash if it is non-empty.
        $data->{'file'} = $mirdir . "/$dir" . $file;
        last;
    }
    $data->{'area'} = $area;
    # Rename a field :)
    $data->{'source'} = $data->{'package'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    # $manifest strips redundant fields for us.
    $manifest->set ($data);
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
