# changelog-file -- lintian check script -*- perl -*-

# Copyright (C) 1998 Christian Schwarz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::changelog_file;
use strict;
use warnings;

use Lintian::Relation::Version qw(versions_gt);
use Lintian::Tags qw(tag);
use Lintian::Check qw(check_spelling);
use Util;

use Encode qw(decode);
use Parse::DebianChangelog;

sub run {

my $pkg = shift;
my $type = shift;
my $info = shift;
my $found_html=0;
my $found_text=0;
my $native_pkg;
my $foreign_pkg;
my $ppkg = quotemeta($pkg);

my @doc_files;

my %file_info;
my %is_a_symlink;

# Modify the file_info by following symbolic links.
for my $file (@{$info->sorted_file_info}) {
    next unless $file =~ m/doc/o;

    $file_info{$file} = $info->file_info->{$file};

    if ($file_info{$file} =~ m/^(?:broken )?symbolic link to (.*)/) {
        $is_a_symlink{$file} = 1;
        # Figure out the link destination.  This algorithm is
        # not perfect but should be good enough.  (If it fails,
        # all that happens is that an evil symlink causes a bogus warning).
        my $newfile;
        my $link = $1;
        if ($link =~ m/^\//) {
            # absolute path; replace
            $newfile = $link;
        } else {
            $newfile = $file;   # relative path; base on $file
            $newfile =~ s,/[^/]+$,,; # strip final pathname component
            # strip another component for every leading ../ in $link
            while ($link =~ m,^\.\./,) {
                $newfile =~ s,/[^/]+$,,;
                $link =~ s,^\.\./,,;
            }
            # concatenate the results
            $newfile .= '/' . $link;
        }
        if (exists $info->file_info->{$newfile}) {
            $file_info{$file} = $info->file_info->{$newfile};
        }
    }
}

# Read package contents....  Capitalization errors are dealt with later.
foreach (@{$info->sorted_index}) {
    next unless length $_;
    # skip packages which have a /usr/share/doc/$pkg -> foo symlink
    if (m,usr/share/doc/$ppkg$, and defined $info->index->{$_}->{link}) {
        return 0;
    }

    # we are only interested in files or symlinks in /usr/(share/)?doc/$pkg
    if (m,usr/(?:share/)?doc/$ppkg/([^/\s]+), ) {
        my $file = $1;
        my $file1 = "usr/share/doc/$pkg/$file";

        push(@doc_files, $file);

        # Check a few things about the NEWS.Debian file.
        if ($file =~ /^NEWS.Debian(?:\.gz)?$/i) {
            if (not $file =~ /\.gz$/) {
                tag 'debian-news-file-not-compressed', $file1;
            } elsif ($file ne 'NEWS.Debian.gz') {
                tag 'wrong-name-for-debian-news-file', $file1;
            }
        }

        # Check if changelog files are compressed with gzip -9.  It's a bit of
        # an open question here what we should do with a file named ChangeLog.
        # If there's also a changelog file, it might be a duplicate, or the
        # packager may have installed NEWS as changelog intentionally.
        next unless $file =~ m/^changelog(?:\.html)?(?:\.gz)?$|changelog.Debian(?:\.gz)?$/;

        if (not $file =~ m/\.gz$/) {
            tag 'changelog-file-not-compressed', $file;
        } else {
            my $max_compressed = 0;
            if (exists $file_info{$file1} && defined $file_info{$file1}) {
                if ($file_info{$file1} =~ m/max compression/o) {
                    $max_compressed = 1;
                }
            }
            if (not $max_compressed and $file_info{$file1} =~ m/gzip compressed/) {
                unless ($is_a_symlink{$file1}) {
                    tag 'changelog-not-compressed-with-max-compression', $file;
                }
            }
        }

        if ($file =~ m/^changelog\.html(?:\.gz)?$/ ) {
            $found_html = 1;
        }
        if ($file =~ m/^changelog(?:\.gz)?$/ ) {
            $found_text = 1;
        }
    }
}

# ignore packages which don't have a /usr/share/doc/$pkg directory, since
# the copyright check will complain about this
if ($#doc_files < 0) {
    return 0;
}

# Check a NEWS.Debian file if we have one.  Save the parsed version of the
# flie for later checks against the changelog file.
my $news;
if (-f 'NEWS.Debian') {
    my $line = file_is_encoded_in_non_utf8('NEWS.Debian', $type, $pkg);
    if ($line) {
        tag 'debian-news-file-uses-obsolete-national-encoding', "at line $line"
    }
    my $changes = Parse::DebianChangelog->init( { infile => 'NEWS.Debian', quiet => 1 } );
    if (my @errors = $changes->get_parse_errors) {
        for (@errors) {
            tag 'syntax-error-in-debian-news-file', "line $_->[1]", "\"$_->[2]\"";
        }
    }

    # Some checks on the most recent entry.
    if ($changes->data and defined (($changes->data)[0])) {
        ($news) = $changes->data;
        if ($news->Distribution && $news->Distribution =~ /unreleased/i) {
            tag 'debian-news-entry-has-strange-distribution', $news->Distribution;
        }
        check_spelling('spelling-error-in-news-debian', $news->Changes,
                       undef, { $pkg => 1});
        if ($news->Changes =~ /^\s*\*\s/) {
            tag 'debian-news-entry-uses-asterisk';
        }
    }
}

if ( $found_html && !$found_text ) {
    tag 'html-changelog-without-text-version';
}

# is this a native Debian package?
my $version;
if (defined $info->field('version')) {
    $version = $info->field('version');
} else {
    # We do not know, but we assume it to be non-native
    # as that is most likely.
    $version = '0-1';
}

$native_pkg  = $info->native;
$foreign_pkg = (!$native_pkg && $version !~ m/-0\./);
# A version of 1.2.3-0.1 could be either, so in that
# case, both vars are false

if ($native_pkg) {
    my @foo;
    # native Debian package
    if (grep m/^changelog(?:\.gz)?$/,@doc_files) {
        # everything is fine
    } elsif (@foo = grep m/^changelog\.debian(?:\.gz)$/i,@doc_files) {
        tag 'wrong-name-for-changelog-of-native-package', "usr/share/doc/$pkg/$foo[0]";
    } else {
        tag 'changelog-file-missing-in-native-package';
    }
} else {
    # non-native (foreign :) Debian package

    # 1. check for upstream changelog
    my $found_upstream_text_changelog = 0;
    if (grep m/^changelog(\.html)?(?:\.gz)?$/,@doc_files) {
        $found_upstream_text_changelog = 1 unless $1;
        # everything is fine
    } else {
        # search for changelogs with wrong file name
        my $found = 0;
        for (@doc_files) {
            if (m/^change/i and not m/debian/i) {
                tag 'wrong-name-for-upstream-changelog', "usr/share/doc/$pkg/$_";
                $found = 1;
                last;
            }
        }
        if (not $found) {
            tag 'no-upstream-changelog' unless $info->is_transitional;
        }
    }

    # 2. check for Debian changelog
    if (grep m/^changelog\.Debian(?:\.gz)?$/,@doc_files) {
        # everything is fine
    } elsif (my @foo = grep m/^changelog\.debian(?:\.gz)?$/i,@doc_files) {
        tag 'wrong-name-for-debian-changelog-file', "usr/share/doc/$pkg/$foo[0]";
    } else {
        if ($foreign_pkg && $found_upstream_text_changelog) {
            tag 'debian-changelog-file-missing-or-wrong-name';
        } elsif ($foreign_pkg) {
            tag 'debian-changelog-file-missing';
        }
        # TODO: if uncertain whether foreign or native, either changelog.gz or
        # changelog.debian.gz should exists though... but no tests catches
        # this (extremely rare) border case... Keep in mind this is only
        # happening if we have a -0.x version number... So not my priority to
        # fix --Jeroen
    }
}

# Everything below involves opening and reading the changelog file, so bail
# with a warning at this point if all we have is a symlink.  Ubuntu permits
# such symlinks, so their profile will suppress this tag.
if (-l 'changelog') {
    tag 'debian-changelog-file-is-a-symlink';
    return 0;
}

# Bail at this point if the changelog file doesn't exist.  We will have
# already warned about this.
unless (-f 'changelog') {
    return 0;
}

# check that changelog is UTF-8 encoded
my $line = file_is_encoded_in_non_utf8('changelog', $type, $pkg);
if ($line) {
    tag 'debian-changelog-file-uses-obsolete-national-encoding', "at line $line"
}

my $changelog = $info->changelog;
if (my @errors = $changelog->get_parse_errors) {
    foreach (@errors) {
        tag 'syntax-error-in-debian-changelog', "line $_->[1]", "\"$_->[2]\"";
    }
}

my @entries = $changelog->data;
if (@entries) {
    my %versions;
    for my $entry (@entries) {
        if ($entry->Maintainer) {
            if ($entry->Maintainer =~ /<([^>\@]+\@[^>.]*)>/) {
                tag 'debian-changelog-file-contains-invalid-email-address', $1;
            }
        }
        $versions{$entry->Version} = 1 if defined $entry->Version;
    }

    if (@entries > 1) {
        my $first_timestamp = $entries[0]->Timestamp;
        my $second_timestamp = $entries[1]->Timestamp;

        if ($first_timestamp && $second_timestamp) {
            tag 'latest-debian-changelog-entry-without-new-date'
                unless (($first_timestamp - $second_timestamp) > 0
                        or lc($entries[0]->Distribution) eq 'unreleased');
        }

        my $first_version = $entries[0]->Version;
        my $second_version = $entries[1]->Version;
        if ($first_version and $second_version) {
            tag 'latest-debian-changelog-entry-without-new-version'
                unless versions_gt($first_version, $second_version)
                    or $entries[0]->Changes =~ /backport/i;
            tag 'latest-debian-changelog-entry-changed-to-native'
                if $native_pkg and $second_version =~ m/-/;
        }

        my $first_upstream = $first_version;
        $first_upstream =~ s/-[^-]+$//;
        my $second_upstream = $second_version;
        $second_upstream =~ s/-[^-]+$//;
        if ($first_upstream eq $second_upstream
            and $entries[0]->Changes =~ /^\s*\*\s+new\s+upstream\s+(?:\S+\s+)?release\b/im) {
            tag 'possible-new-upstream-release-without-new-version';
        }

        my $first_dist = lc $entries[0]->Distribution;
        my $second_dist = lc $entries[1]->Distribution;
        if ($first_dist eq 'unstable' and $second_dist eq 'experimental') {
            unless ($entries[0]->Changes =~ /\bto\s+unstable\b/) {
                tag 'experimental-to-unstable-without-comment';
            }
        }
    }

    # Some checks should only be done against the most recent changelog entry.
    my $entry = $entries[0];
    if (@entries == 1 and $entry->Version and $entry->Version =~ /-1$/) {
        tag 'new-package-should-close-itp-bug'
            unless @{ $entry->Closes };
    }
    my $changes = $entry->Changes || '';
    while ($changes =~ /(closes\s*(?:bug)?\#?\s?\d{6,})[^\w]/ig) {
        tag 'possible-missing-colon-in-closes', $1 if $1;
    }
    my $closes = $entry->Closes;
    for my $bug (@$closes) {
        tag 'improbable-bug-number-in-closes', $bug if ($bug < 100);
    }

    # unstable, testing, and stable shouldn't be used in Debian version
    # numbers.  unstable should get a normal version increment and testing and
    # stable should get suite-specific versions.
    #
    # NMUs get a free pass because they need to work with the version number
    # that was already there.
    my $changelog_version;
    if ($info->native) {
        $changelog_version = $entry->Version || '';
    } else {
        if ($entry->Version) {
            ($changelog_version) = (split('-', $entry->Version))[-1];
        } else {
            $changelog_version = '';
        }
    }
    unless (not $info->native and $changelog_version =~ /\./) {
        if ($info->native and $changelog_version =~ /testing|(?:un)?stable/i) {
            tag 'version-refers-to-distribution', $entry->Version;
        } elsif ($changelog_version =~ /woody|sarge|etch|lenny|squeeze/) {
            my %unreleased_dists = map { $_ => 1 } qw(unstable experimental);
            if (exists ($unreleased_dists{$entry->Distribution})) {
                tag 'version-refers-to-distribution', $entry->Version;
            }
        }
    }

    # Compare against NEWS.Debian if available.
    if ($news and $news->Version) {
        if ($entry->Version eq $news->Version) {
            for my $field (qw/Distribution Urgency/) {
                if ($entry->$field ne $news->$field) {
                    tag 'changelog-news-debian-mismatch', lc ($field),
                        $entry->$field . ' != ' . $news->$field;
                }
            }
        }
        unless ($versions{$news->Version}) {
            tag 'debian-news-entry-has-unknown-version', $news->Version;
        }
    }

    # We have to decode into UTF-8 to get the right length for the length
    # check.  For some reason, use open ':utf8' isn't sufficient.  If the
    # changelog uses a non-UTF-8 encoding, this will mangle it, but it doesn't
    # matter for the length check.
    #
    # Parse::DebianChangelog adds an additional space to the beginning of each
    # line, so we have to adjust for that in the length check.
    my @lines = split ("\n", decode ('utf-8', $changes));
    for my $i (0 .. $#lines) {
        if (length($lines[$i]) > 81
            and $lines[$i] !~ /^[\s.o*+-]*(?:[Ss]ee:?\s+)?\S+$/) {
            tag 'debian-changelog-line-too-long', 'line ' . ($i + 1);
        }
    }

    # Strip out all lines that contain the word spelling to avoid false
    # positives on changelog entries for spelling fixes.
    $changes =~ s/^.*spelling.*\n//gm;
    check_spelling('spelling-error-in-changelog', $changes, undef, { $pkg => 1});
}

# read the changelog itself
#
# emacs only looks at the last "local variables" in a file, and only at
# one within 3000 chars of EOF and on the last page (^L), but that's a bit
# pesky to replicate.  Demanding a match of $prefix and $suffix ought to
# be enough to avoid false positives.
open (IN, '<', 'changelog')
    or fail("cannot find changelog for $type package $pkg");
my ($prefix, $suffix);
while (<IN>) {

    if (/closes:\s*(((?:bug)?\#?\s?\d*)[[:alpha:]]\w*)/io
        || /closes:\s*(?:bug)?\#?\s?\d+
              (?:,\s*(?:bug)?\#?\s?\d+)*
              (?:,\s*(((?:bug)?\#?\s?\d*)[[:alpha:]]\w*))/iox) {
        tag 'wrong-bug-number-in-closes', "l$.:$1" if $2;
    }

    if (/^(.*)Local\ variables:(.*)$/i) {
        $prefix = $1;
        $suffix = $2;
    }
    # emacs allows whitespace between prefix and variable, hence \s*
    if (defined $prefix && defined $suffix
        && /^\Q$prefix\E\s*add-log-mailing-address:.*\Q$suffix\E$/) {
        tag 'debian-changelog-file-contains-obsolete-user-emacs-settings';
    }
}
close IN;

}

1;

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
