# scripts -- lintian check script -*- perl -*-
#
# This is probably the right file to add a check for the use of
# set -e in bash and sh scripts.
#
# Copyright (C) 1998 Richard Braakman
# Copyright (C) 2002 Josip Rodin
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::scripts;
use strict;
use warnings;

use Util;

use Lintian::Check qw($known_shells_regex);
use Lintian::Data;
use Lintian::Relation;
use Lintian::Tags qw(tag);

sub _parse_interpreters {
    my ($interpreter, $value) = @_;
    my ($path, $dep) = split m/\s*,\s*/, $value, 2;
    $dep = $interpreter if not $dep;
    $dep = '' if $dep eq '@NODEPS@';
    return [$path, $dep];
}

# This is a map of all known interpreters.  The key is the interpreter
# name (the binary invoked on the #! line).  The value is an anonymous
# array of two elements.  The first argument is the path on a Debian
# system where that interpreter would be installed.  The second
# argument is the dependency that provides that interpreter.
#
# $INTERPRETERS maps names of (unversioned) interpreters to the path
# they are installed and what package to depend on to use them.
#
my $INTERPRETERS = Lintian::Data->new ('scripts/interpreters', qr/\s*=\>\s*/o,
                                       \&_parse_interpreters);


# The more complex case of interpreters that may have a version number.
#
# This is a hash from the base interpreter name to a list.  The base
# interpreter name may appear by itself or followed by some combination of
# dashes, digits, and periods.  The values are the directory in which the
# interpreter is found, the dependency to add for a version-less interpreter,
# a regular expression to match versioned interpreters and extract the version
# number, the package dependency for a versioned interpreter, and the list of
# known versions.
#
# An interpreter with a version must have a dependency on the specific package
# formed by taking the fourth element of the list and replacing $1 with the
# version number.  An interpreter without a version is rejected if the second
# element is undef; otherwise, the package must satisfy a dependency on the
# disjunction of the second argument (if non-empty) and all the packages
# formed by taking the list of known versions (the fifth element and on) and
# replacing $1 in the fourth argument with them.
#
# For example:
#
#    lua => [ '/usr/bin', 'lua', qr/^lua([\d.]+)$/, 'lua$1', qw(40 50 5.1) ]
#
# says that any lua interpreter must be in /usr/bin, a package using
# /usr/bin/lua50 must depend on lua50, and a package using just /usr/bin/lua
# must satisfy lua | lua40 | lusa50 | lua5.1.
#
# The list of known versions is the largest maintenance headache here, but
# it's only used for the unversioned dependency handling, and then only when
# someone uses the unversioned script but depends on a specific version for
# some reason.  So it's not a huge problem if it's a little out of date.
my %versioned_interpreters =
    (guile   => [ '/usr/bin', 'guile',
                  qr/^guile-([\d.]+)$/, 'guile-$1', qw(1.6 1.8)
                ],
     jruby   => [ '/usr/bin', 'jruby',
                  qr/^jruby([\d.]+)$/, 'jruby$1', qw(1.0 1.1 1.2)
                ],
     lua     => [ '/usr/bin', 'lua',
                  qr/^lua([\d.]+)$/, 'lua$1', qw(40 50 5.1)
                ],
     octave  => [ '/usr/bin', 'octave',
                  qr/^octave([\d.]+)$/, 'octave$1', qw(3.0 3.2)
                ],
     php     => [ '/usr/bin', '',
                  qr/^php(\d+)$/, 'php$1-cli', qw(5)
                ],
     pike    => [ '/usr/bin', '',
                  qr/^pike([\d.]+)$/, 'pike$1 | pike$1-core', qw(7.6 7.8)
                ],
     python  => [ '/usr/bin', undef,
                  qr/^python([\d.]+)$/, 'python$1 | python$1-minimal',
                  qw(2.4 2.5 2.6)
                ],
     rackup  => [ '/usr/bin', undef,
                  qr/^rackup([\d.]+)$/, 'librack-ruby$1', qw(1.8 1.9)
                ],
     ruby    => [ '/usr/bin', undef,
                  qr/^ruby([\d.]+)$/, 'ruby$1', qw(1.8 1.9)
                ],
     runghc  => [ '/usr/bin', 'ghc',
                  qr/^runghc(\d+)$/, 'ghc$1', qw(6)
                ],
     scsh    => [ '/usr/bin', 'scsh',
                  qr/^scsh-([\d.]+)$/, 'scsh-$1', qw(0.6)
                ],
     tclsh   => [ '/usr/bin', 'tclsh | tcl',
                  qr/^tclsh([\d.]+)$/, 'tcl$1', qw(8.3 8.4 8.5 8.6)
                ],
     wish    => [ '/usr/bin', 'wish | tk',
                  qr/^wish([\d.]+)$/, 'tk$1', qw(8.3 8.4 8.5 8.6)
                ],
    );

# Any of the following packages can satisfy an update-inetd dependency.
my $update_inetd
    = join (' | ', qw(update-inetd inet-superserver openbsd-inetd
                      inetutils-inetd rlinetd xinetd));

# Appearance of one of these regexes in a maintainer script means that there
# must be a dependency (or pre-dependency) on the given package.  The tag
# reported is maintainer-script-needs-depends-on-%s, so be sure to update
# scripts.desc when adding a new rule.
my @depends_needed = (
        [ adduser       => '\badduser\s'           ],
        [ gconf2        => '\bgconf-schemas\s'     ],
        [ $update_inetd => '\bupdate-inetd\s'      ],
        [ ucf           => '\bucf\s'               ],
        [ 'xml-core'    => '\bupdate-xmlcatalog\s' ],
);

# When detecting commands inside shell scripts, use this regex to match the
# beginning of the command rather than checking whether the command is at the
# beginning of a line.
my $LEADIN = qr'(?:(?:^|[`&;(|{])\s*|(?:if|then|do|while)\s+)';

my @bashism_single_quote_regexs = (
    $LEADIN . qr'echo\s+(?:-[^e\s]+\s+)?\'[^\']*(\\[abcEfnrtv0])+.*?[\']',
        # unsafe echo with backslashes
    $LEADIN . qr'source\s+[\"\']?(?:\.\/|\/|\$|[\w~.-])\S*',
        # should be '.', not 'source'
);
my @bashism_string_regexs = (
    qr'\$\[\w+\]',               # arith not allowed
    qr'\$\{\w+\:\d+(?::\d+)?\}',   # ${foo:3[:1]}
    qr'\$\{\w+(/.+?){1,2}\}',    # ${parm/?/pat[/str]}
    qr'\$\{\#?\w+\[[0-9\*\@]+\]\}',# bash arrays, ${name[0|*|@]}
    qr'\$\{!\w+[\@*]\}',                 # ${!prefix[*|@]}
    qr'\$\{!\w+\}',              # ${!name}
    qr'(\$\(|\`)\s*\<\s*\S+\s*(\)|\`)', # $(\< foo) should be $(cat foo)
    qr'\$\{?RANDOM\}?\b',                # $RANDOM
    qr'\$\{?(OS|MACH)TYPE\}?\b',   # $(OS|MACH)TYPE
    qr'\$\{?HOST(TYPE|NAME)\}?\b', # $HOST(TYPE|NAME)
    qr'\$\{?DIRSTACK\}?\b',        # $DIRSTACK
    qr'\$\{?EUID\}?\b',            # $EUID should be "id -u"
    qr'\$\{?UID\}?\b',           # $UID should be "id -ru"
    qr'\$\{?SECONDS\}?\b',       # $SECONDS
    qr'\$\{?BASH_[A-Z]+\}?\b',     # $BASH_SOMETHING
    qr'\$\{?SHELLOPTS\}?\b',       # $SHELLOPTS
    qr'\$\{?PIPESTATUS\}?\b',      # $PIPESTATUS
    qr'\$\{?SHLVL\}?\b',                 # $SHLVL
    qr'<<<',                       # <<< here string
    $LEADIN . qr'echo\s+(?:-[^e\s]+\s+)?\"[^\"]*(\\[abcEfnrtv0])+.*?[\"]',
        # unsafe echo with backslashes
);
my @bashism_regexs = (
    qr'(?:^|\s+)function \w+(\s|\(|\Z)',  # function is useless
    qr'(test|-o|-a)\s*[^\s]+\s+==\s', # should be 'b = a'
    qr'\[\s+[^\]]+\s+==\s',        # should be 'b = a'
    qr'\s(\|\&)',                        # pipelining is not POSIX
    qr'[^\\\$]\{(?:[^\s\\\}]*?,)+[^\\\}\s]*\}', # brace expansion
    qr'(?:^|\s+)\w+\[\d+\]=',      # bash arrays, H[0]
    $LEADIN . qr'read\s+(?:-[a-qs-zA-Z\d-]+)',
        # read with option other than -r
    $LEADIN . qr'read\s*(?:-\w+\s*)*(?:\".*?\"|[\'].*?[\'])?\s*(?:;|$)',
        # read without variable
    qr'\&>',                     # cshism
    qr'(<\&|>\&)\s*((-|\d+)[^\s;|)`&\\\\]|[^-\d\s]+)', # should be >word 2>&1
    qr'\[\[(?!:)',               # alternative test command
    $LEADIN . qr'select\s+\w+',    # 'select' is not POSIX
    $LEADIN . qr'echo\s+(-n\s+)?-n?en?',  # echo -e
    $LEADIN . qr'exec\s+-[acl]',   # exec -c/-l/-a name
    qr'(?:^|\s+)let\s',          # let ...
    qr'(?<![\$\(])\(\(.*\)\)',     # '((' should be '$(('
    qr'\$\[[^][]+\]',            # '$[' should be '$(('
    qr'(\[|test)\s+-a',          # test with unary -a (should be -e)
    qr'/dev/(tcp|udp)',          # /dev/(tcp|udp)
    $LEADIN . qr'\w+\+=',                # should be "VAR="${VAR}foo"
    $LEADIN . qr'suspend\s',
    $LEADIN . qr'caller\s',
    $LEADIN . qr'complete\s',
    $LEADIN . qr'compgen\s',
    $LEADIN . qr'declare\s',
    $LEADIN . qr'typeset\s',
    $LEADIN . qr'disown\s',
    $LEADIN . qr'builtin\s',
    $LEADIN . qr'set\s+-[BHT]+',   # set -[BHT]
    $LEADIN . qr'alias\s+-p',      # alias -p
    $LEADIN . qr'unalias\s+-a',    # unalias -a
    $LEADIN . qr'local\s+-[a-zA-Z]+', # local -opt
    qr'(?:^|\s+)\s*\(?\w*[^\(\w\s]+\S*?\s*\(\)\s*([\{|\(]|\Z)',
        # function names should only contain [a-z0-9_]
    $LEADIN . qr'(push|pop)d(\s|\Z)',   # (push|pod)d
    $LEADIN . qr'export\s+-[^p]',  # export only takes -p as an option
    $LEADIN . qr'ulimit(\s|\Z)',
    $LEADIN . qr'shopt(\s|\Z)',
    $LEADIN . qr'type\s',
    $LEADIN . qr'time\s',
    $LEADIN . qr'dirs(\s|\Z)',
    qr'(?:^|\s+)[<>]\(.*?\)',      # <() process substituion
    qr'(?:^|\s+)readonly\s+-[af]', # readonly -[af]
    $LEADIN . qr'(sh|\$\{?SHELL\}?) -[rD]', # sh -[rD]
    $LEADIN . qr'(sh|\$\{?SHELL\}?) --\w+', # sh --long-option
    $LEADIN . qr'(sh|\$\{?SHELL\}?) [-+]O', # sh [-+]O
);

# a local function to help use separate tags for example scripts
sub script_tag {
    my( $tag, $filename, @rest ) = @_;

    $tag = "example-$tag"
        if $filename and $filename =~ m,usr/share/doc/[^/]+/examples/,;

    tag( $tag, $filename, @rest );
}

sub run {

my %executable = ();
my %ELF = ();
my %scripts = ();

# no dependency for install-menu, because the menu package specifically
# says not to depend on it.

my $pkg = shift;
my $type = shift;
my $info = shift;

foreach (@{$info->sorted_index}) {
    next if $_ eq '';
    my $index_info = $info->index->{$_};
    my $operm = $index_info->{operm};
    next unless $index_info->{type} =~ m,^[-h], and ($operm & 0111);
    my $is_suid = $operm & 04000;
    $executable{$_} = 1;
}

for my $file (@{$info->sorted_file_info}) {
    $ELF{$file} = 1 if $info->file_info->{$file} =~ /^[^,]*\bELF\b/o;
}

my $all_deps = '';
for my $field (qw/suggests recommends depends pre-depends provides/) {
    if (defined $info->field($field)) {
        $all_deps .= ', ' if $all_deps;
        $all_deps .= $info->field($field);
    }
}
$all_deps .= ', ' if $all_deps;
$all_deps .= $pkg;
my $all_parsed = Lintian::Relation->new($all_deps);
my $str_deps = $info->relation('strong');


for my $filename (sort keys %{$info->scripts}) {
    my $interpreter = $info->scripts->{$filename}->{interpreter};
    my $calls_env = $info->scripts->{$filename}->{calls_env};
    my $path;
    $scripts{$filename} = 1;

    my $in_docs = $filename =~ m,usr/share/doc/,;
    my $in_examples = $filename =~ m,usr/share/doc/[^/]+/examples/,;

    # no checks necessary at all for scripts in /usr/share/doc/
    # unless they are examples
    next if $in_docs and !$in_examples;

    my ($base) = $interpreter =~ m,([^/]*)$,;

    # allow exception for .in files that have stuff like #!@PERL@
    next if ($filename =~ m,\.in$, and $interpreter =~ m,^(\@|<\<)[A-Z_]+(\@|>\>)$,);

    my $is_absolute = ($interpreter =~ m,^/, or defined $calls_env);

    # Skip files that have the #! line, but are not executable and do not have
    # an absolute path and are not in a bin/ directory (/usr/bin, /bin etc)
    # They are probably not scripts after all.
    next if ($filename !~ m,(bin/|etc/init\.d/), and !$executable{$filename}
             and !$is_absolute and !$in_examples);

    # Example directories sometimes contain Perl libraries, and some people
    # use initial lines like #!perl or #!python to provide editor hints, so
    # skip those too if they're not executable.  Be conservative here, since
    # it's not uncommon for people to both not set examples executable and not
    # fix the path and we want to warn about that.
    next if ($filename =~ /\.pm\z/ and !$executable{$filename}
             and !$is_absolute and $in_examples);

    if ($interpreter eq '') {
        script_tag('script-without-interpreter', $filename);
        next;
    }

    # Either they use an absolute path or they use '/usr/bin/env interp'.
    script_tag('interpreter-not-absolute', $filename, "#!$interpreter")
        unless $is_absolute;
    tag 'script-not-executable', $filename
        unless ($executable{$filename}
                or $filename =~ m,^usr/(lib|share)/.*\.pm,
                or $filename =~ m,^usr/(lib|share)/.*\.py,
                or $filename =~ m,^usr/(lib|share)/ruby/.*\.rb,
                or $filename =~ m,\.in$,
                or $filename =~ m,\.erb$,
                or $filename =~ m,\.ex$,
                or $filename eq 'etc/init.d/skeleton'
                or $filename =~ m,^etc/menu-methods,
                or $filename =~ m,^etc/X11/Xsession\.d,)
                or $in_docs;

    # Warn about csh scripts.
    tag 'csh-considered-harmful', $filename
        if (($base eq 'csh' or $base eq 'tcsh')
            and $executable{$filename}
            and $filename !~ m,^etc/csh/login\.d/,)
            and !$in_docs;

    $path = $info->unpacked($filename);
    # Syntax-check most shell scripts, but don't syntax-check scripts that end
    # in .dpatch.  bash -n doesn't stop checking at exit 0 and goes on to blow
    # up on the patch itself.
    if ($base =~ /^$known_shells_regex$/) {
        if (-x $interpreter
            and ! script_is_evil_and_wrong($path)
            and $filename !~ m,\.dpatch$,
            and $filename !~ m,\.erb$,
            # exclude some shells. zsh -n is broken, see #485885
            and $base !~ m/^(z|t?c)sh$/) {

            if (check_script_syntax($interpreter, $path)) {
                script_tag('shell-script-fails-syntax-check', $filename);
            }
        }
    }

    # Try to find the expected path of the script to check.  First check
    # $INTERPRETERS and %versioned_interpreters.  If not found there, see if
    # it ends in a version number and the base is found in
    # %versioned_interpreters.
    my $data = $INTERPRETERS->value ($base);
    my $versioned = 0;
    if (not defined $data) {
        $data = $versioned_interpreters{$base};
        undef $data if ($data and not defined ($data->[1]));
        if (not defined ($data) and $base =~ /^(.*[^\d.-])-?[\d.]+$/) {
            $data = $versioned_interpreters{$1};
            undef $data unless ($data and $base =~ /$data->[2]/);
        }
        $versioned = 1 if $data;
    }
    if ($data) {
        my $expected = $data->[0] . '/' . $base;
        unless ($interpreter eq $expected or defined $calls_env) {
            script_tag('wrong-path-for-interpreter', $filename,
                "(#!$interpreter != $expected)");
        }
    } elsif ($interpreter =~ m,/usr/local/,) {
        script_tag('interpreter-in-usr-local', $filename, "#!$interpreter");
    } elsif ($executable{'.' . $interpreter}) {
        # Package installs the interpreter itself, so it's probably ok.  Don't
        # emit any tag for this.
    } elsif ($interpreter eq '/bin/env') {
        script_tag('script-uses-bin-env', $filename);
    } else {
        script_tag('unusual-interpreter', $filename, "#!$interpreter");
    }

    # Check for obsolete perl libraries
    if ($base eq 'perl' &&
             !$str_deps->implies ('libperl4-corelibs-perl | perl (<< 5.12.3-7)')) {
        open(FH, '<', $path) or fail("could not open script $path");
        while (<FH>) {
            if (/(?:do|require)\s+(?:'|")(abbrev|assert|bigfloat|bigint|bigrat|cacheout|complete|ctime|dotsh|exceptions|fastcwd|find|finddepth|flush|getcwd|getopt|getopts|hostname|importenv|look|newgetopt|open2|open3|pwd|shellwords|stat|syslog|tainted|termcap|timelocal|validate)\.pl(?:'|")/) {
                tag 'script-uses-perl4-libs-without-dep', "$filename:$. ${1}.pl";
            }
        }
        close(FH);
    }

    # If we found the interpreter and the script is executable, check
    # dependencies.  This should be the last thing we do in the loop so that
    # we can use next for an early exit and reduce the nesting.
    next unless ($data && $executable{$filename} and !$in_docs);
    if (!$versioned) {
        my $depends = $data->[1];
        if (not defined $depends) {
            $depends = $base;
        }
        if ($depends && !$all_parsed->implies($depends)) {
            if ($base =~ /^(python|ruby|(m|g)awk)$/) {
                tag("$base-script-but-no-$base-dep", $filename);
            } elsif ($base eq 'csh' && $filename =~ m,^etc/csh/login\.d/,) {
                # Initialization files for csh.
            } elsif ($base eq 'fish' && $filename =~ m,^etc/fish\.d/,) {
                # Initialization files for fish.
            } elsif ($base eq 'ocamlrun' && $all_deps =~ /\bocaml(-base)?(-nox)?-\d\.[\d.]+/) {
                # ABI-versioned virtual packages for ocaml
            } else {
                tag 'missing-dep-for-interpreter', "$base => $depends",
                    "($filename)";
            }
        }
    } elsif ($versioned_interpreters{$base}) {
        my @versions = @$data[4 .. @$data - 1];
        my @depends = map {
            my $d = $data->[3];
            $d =~ s/\$1/$_/g;
            $d;
        } @versions;
        unshift (@depends, $data->[1]) if length $data->[1];
        my $depends = join (' | ',  @depends);
        unless ($all_parsed->implies($depends)) {
            if ($base eq 'php') {
                tag 'php-script-but-no-phpX-cli-dep', $filename;
            } elsif ($base =~ /^(wish|tclsh)/) {
                tag "$1-script-but-no-$1-dep", $filename;
            } else {
                tag 'missing-dep-for-interpreter', "$base => $depends",
                    "($filename)";
            }
        }
    } else {
        my ($version) = ($base =~ /$data->[2]/);
        my $depends = $data->[3];
        $depends =~ s/\$1/$version/g;
        unless ($all_parsed->implies($depends)) {
            if ($base =~ /^php/) {
                tag 'php-script-but-no-phpX-cli-dep', $filename;
            } elsif ($base =~ /^(python|ruby)/) {
                tag "$1-script-but-no-$1-dep", $filename;
            } else {
                tag 'missing-dep-for-interpreter', "$base => $depends",
                    "($filename)";
            }
        }
    }
}

foreach (keys %executable) {
    tag 'executable-not-elf-or-script', $_
        unless ( $ELF{$_}
                 or $scripts{$_}
                 or $_ =~ m,^usr(?:/X11R6)?/man/,
                 or $_ =~ m/\.exe$/ # mono convention
                 );
}

open(SCRIPTS, '<', 'control-scripts')
    or fail("cannot open lintian control-scripts file: $!");

# Handle control scripts.  This is an edited version of the code for
# normal scripts above, because there were just enough differences to
# make a shared function awkward.

my %added_diversions;
my %removed_diversions;
my $expand_diversions = 0;
while (<SCRIPTS>) {
    chop;

    m/^(\S*) (.*)$/ or fail("bad line in control-scripts file: $_");
    my $interpreter = $1;
    my $file = $2;
    my $filename = $info->control($file);

    $interpreter =~ m|([^/]*)$|;
    my $base = $1;

    if ($interpreter eq '') {
        tag 'script-without-interpreter', "control/$file";
        next;
    }

    tag 'interpreter-not-absolute', "control/$file", "#!$interpreter"
        unless ($interpreter =~ m|^/|);

    if ($interpreter =~ m|/usr/local/|) {
        tag 'control-interpreter-in-usr-local', "control/$file", "#!$interpreter";
    } elsif ($base eq 'sh' or $base eq 'bash' or $base eq 'perl') {
        my $expected = ($INTERPRETERS->value ($base))->[0] . '/' . $base;
        tag 'wrong-path-for-interpreter', "#!$interpreter != $expected",
            "(control/$file)"
            unless ($interpreter eq $expected);
    } elsif ($file eq 'config') {
        tag 'forbidden-config-interpreter', "#!$interpreter";
    } elsif ($file eq 'postrm') {
        tag 'forbidden-postrm-interpreter', "#!$interpreter";
    } elsif ($INTERPRETERS->known ($base)) {
        my $data = $INTERPRETERS->value ($base);
        my $expected = $data->[0] . '/' . $base;
        unless ($interpreter eq $expected) {
            tag 'wrong-path-for-interpreter', "#!$interpreter != $expected",
                "(control/$file)"
        }
        tag 'unusual-control-interpreter', "control/$file", "#!$interpreter";

        # Interpreters used by preinst scripts must be in Pre-Depends.
        # Interpreters used by postinst or prerm scripts must be in Depends.
        unless (defined ($data->[1]) and not $data->[1]) {
            my $depends = Lintian::Relation->new($data->[1] || $base);
            if ($file eq 'preinst') {
                unless ($info->relation('pre-depends')->implies($depends)) {
                    tag 'preinst-interpreter-without-predepends',
                        "#!$interpreter"
                }
            } else {
                unless ($info->relation('strong')->implies($depends)) {
                    tag 'control-interpreter-without-depends', "control/$file",
                        "#!$interpreter"
                }
            }
        }
    } else {
        tag 'unknown-control-interpreter', "control/$file", "#!$interpreter";
        next; # no use doing further checks if it's not a known interpreter
    }

    # perhaps we should warn about *csh even if they're somehow screwed,
    # but that's not really important...
    tag 'csh-considered-harmful', "control/$file"
        if ($base eq 'csh' or $base eq 'tcsh');

    my $shellscript = $base =~ /^$known_shells_regex$/ ? 1 : 0;

    # Only syntax-check scripts we can check with bash.
    my $checkbashisms;
    if ($shellscript) {
        $checkbashisms = $base eq 'sh' ? 1 : 0;
        if ($base eq 'sh' or $base eq 'bash') {
            if (check_script_syntax('/bin/bash', $filename)) {
                tag 'maintainer-shell-script-fails-syntax-check', $file;
            }
        }
    }

    # now scan the file contents themselves
    open (C, '<', $filename)
        or fail("cannot open maintainer script $filename for reading: $!");

    my %warned;
    my ($saw_init, $saw_invoke, $saw_debconf, $saw_bange, $saw_sete, $has_code);
    my $cat_string = '';

    my $previous_line = '';
    while (<C>) {
        if ($. == 1 && $shellscript && m,/$base\s*.*\s-\w*e\w*\b,) {
            $saw_bange = 1;
        }

        next if m,^\s*$,;  # skip empty lines
        next if m,^\s*\#,; # skip comment lines
        $_ = remove_comments($_);

        # Concatenate lines containing continuation character (\) at the end
        if ($shellscript && /\\$/) {
            s/\\//;
            chomp;
            $previous_line .= $_;
            next;
        }

        chomp;
        $_ = $previous_line . $_;
        $previous_line = '';

        # Don't consider the standard dh-make boilerplate to be code.  This
        # means ignoring the framework of a case statement, the labels, the
        # echo complaining about unknown arguments, and an exit.
        unless ($has_code
                || m/^\s*set\s+-\w+\s*$/
                || m/^\s*case\s+\"?\$1\"?\s+in\s*$/
                || m/^\s*(?:[a-z|-]+|\*)\)\s*$/
                || m/^\s*[:;]+\s*$/
                || m/^\s*echo\s+\"[^\"]+\"(?:\s*>&2)?\s*$/
                || m/^\s*esac\s*$/
                || m/^\s*exit\s+\d+\s*$/) {
            $has_code = 1;
        }

        if ($shellscript && m,${LEADIN}set\s*(\s+-(-.*|[^e]+))*\s-\w*e,) {
            $saw_sete = 1;
        }

        if (m,[^\w]((/var)?/tmp|\$TMPDIR)/[^)\]}\s], and not m/\bmks?temp\b/ and not m/\btempfile\b/ and not m/\bmkdir\b/ and not m/\$RANDOM/) {
            tag 'possibly-insecure-handling-of-tmp-files-in-maintainer-script', "$file:$."
                unless $warned{tmp};
            $warned{tmp} = 1;
        }
        if (m/^\s*killall(?:\s|\z)/) {
            tag 'killall-is-dangerous', "$file:$." unless $warned{killall};
            $warned{killall} = 1;
        }
        if (m/^\s*mknod(?:\s|\z)/ and not m/\sp\s/) {
            tag 'mknod-in-maintainer-script', "$file:$.";
        }

        # Collect information about init script invocations to catch running
        # init scripts directly rather than through invoke-rc.d.  Since the
        # script is allowed to run the init script directly if invoke-rc.d
        # doesn't exist, only tag direct invocations where invoke-rc.d is
        # never used in the same script.  Lots of false negatives, but
        # hopefully not many false positives.
        if (m%^\s*/etc/init\.d/(\S+)\s+[\"\']?(\S+)[\"\']?%) {
            $saw_init = $.;
        }
        if (m%^\s*invoke-rc\.d\s+%) {
            $saw_invoke = $.;
        }

        if ($shellscript) {
            if ($cat_string ne '' and m/^\Q$cat_string\E$/) {
                $cat_string = '';
            }
            my $within_another_shell = 0;
            if (m,(?:^|\s+)(?:(?:/usr)?/bin/)?($known_shells_regex)\s+-c\s*.+,
                and $1 ne 'sh') {
                $within_another_shell = 1;
            }
            # if cat_string is set, we are in a HERE document and need not
            # check for things
            if ($cat_string eq '' and $checkbashisms and !$within_another_shell) {
                my $found = 0;
                my $match = '';

                # since this test is ugly, I have to do it by itself
                # detect source (.) trying to pass args to the command it runs
                # The first expression weeds out '. "foo bar"'
                if (not $found and
                    not m/^\s*\.\s+(\"[^\"]+\"|\'[^\']+\')\s*(\&|\||\d?>|<|;|\Z)/
                    and m/^\s*(\.\s+[^\s;\`:]+\s+([^\s;]+))/) {

                    my $extra;
                    ($match, $extra) = ($1, $2);
                    if ($extra =~ /^(\&|\||\d?>|<)/) {
                        # everything is ok
                        ;
                    } else {
                        $found = 1;
                    }
                }

                my $line = $_;

                unless ($found) {
                    for my $re (@bashism_single_quote_regexs) {
                        if ($line =~ m/($re)/) {
                            $found = 1;
                            ($match) = m/($re)/;
                            last;
                        }
                    }
                }

                # Ignore anything inside single quotes; it could be an
                # argument to grep or the like.

                # $cat_line contains the version of the line we'll check
                # for heredoc delimiters later. Initially, remove any
                # spaces between << and the delimiter to make the following
                # updates to $cat_line easier.
                my $cat_line = $line;
                $cat_line =~ s/(<\<-?)\s+/$1/g;

                # Remove single quoted strings, with the exception that we
                # don't remove the string
                # if the quote is immediately preceeded by a < or a -, so we
                # can match "foo <<-?'xyz'" as a heredoc later
                # The check is a little more greedy than we'd like, but the
                # heredoc test itself will weed out any false positives
                $cat_line =~ s/(^|[^<\\\"-](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g;

                unless ($found) {
                    # Remove "quoted quotes". They're likely to be inside
                    # another pair of quotes; we're not interested in
                    # them for their own sake and removing them makes finding
                    # the limits of the outer pair far easier.
                    $line =~ s/(^|[^\\\'\"])\"\'\"/$1/g;
                    $line =~ s/(^|[^\\\'\"])\'\"\'/$1/g;

                    $line =~ s/(^|[^\\\"](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g;
                    for my $re (@bashism_string_regexs) {
                        if ($line =~ m/($re)/) {
                            $found = 1;
                            ($match) = m/($re)/;
                            last;
                        }
                    }
                }

                # We've checked for all the things we still want to notice in
                # double-quoted strings, so now remove those strings as well.
                $cat_line =~ s/(^|[^<\\\'-](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g;
                unless ($found) {
                    $line =~ s/(^|[^\\\'](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g;
                    for my $re (@bashism_regexs) {
                        if ($line =~ m/($re)/) {
                            $found = 1;
                            ($match) = m/($re)/;
                            last;
                        }
                    }
                }

                if ($found) {
                    tag 'possible-bashism-in-maintainer-script', "$file:$. \'$match\'";
                }

                # Only look for the beginning of a heredoc here, after we've
                # stripped out quoted material, to avoid false positives.
                if ($cat_line =~ m/(?:^|[^<])\<\<\-?\s*(?:[\\]?(\w+)|[\'\"](.*?)[\'\"])/) {
                    $cat_string = $1;
                    $cat_string = $2 if not defined $cat_string;
                }
            }
            if (!$cat_string) {
                if (/^\s*start-stop-daemon\s+/ && !/\s--stop\b/) {
                    tag 'start-stop-daemon-in-maintainer-script', "$file:$.";
                }
                # Don't use chown foo.bar
                if (/(chown(\s+--?[A-Za-z-]+)*\s+[-_A-Za-z0-9]+\.[-_A-Za-z0-9]+)\s+/) {
                    tag 'deprecated-chown-usage', "$file:$. \'$1\'";
                }
                if (/invoke-rc.d.*\|\| exit 0/) {
                    tag 'maintainer-script-hides-init-failure', "$file:$.";
                }
                if (m,/usr/share/debconf/confmodule,) {
                    $saw_debconf = 1;
                }
                if (m/^\s*read(?:\s|\z)/ && !$saw_debconf) {
                    tag 'read-in-maintainer-script', "$file:$.";
                }
                if (m,^\s*rm\s+([^>]*\s)?/dev/,) {
                    tag 'maintainer-script-removes-device-files', "$file:$.";
                }
                if (m,>\s*(/etc/(?:services|protocols|rpc))(\s|\Z),) {
                    tag 'maintainer-script-modifies-netbase-managed-file', "$file:$. $1";
                }
                if (m,^\s*(?:cp|mv)\s.*(/etc/(?:services|protocols|rpc))\s*$,) {
                    tag 'maintainer-script-modifies-netbase-managed-file', "$file:$. $1";
                }
                if (m,>\s*/etc/inetd\.conf(\s|\Z),) {
                    tag 'maintainer-script-modifies-inetd-conf', "$file:$."
                        unless $info->relation('provides')->implies('inet-superserver');
                }
                if (m,^\s*(?:cp|mv)\s+(?:.*\s)?/etc/inetd\.conf\s*$,) {
                    tag 'maintainer-script-modifies-inetd-conf', "$file:$."
                        unless $info->relation('provides')->implies('inet-superserver');
                }
                if (m,>\s*/etc/ld\.so\.conf(\s|\Z),) {
                    tag 'maintainer-script-modifies-ld-so-conf', "$file:$."
                        unless $pkg =~ /^libc/;
                }
                if (m,^\s*(?:cp|mv)\s+(?:.*\s)?/etc/ld\.so\.conf\s*$,) {
                    tag 'maintainer-script-modifies-ld-so-conf', "$file:$."
                        unless $pkg =~ /^libc/;
                }

                # Ancient dpkg feature tests.
                if (m/${LEADIN}dpkg\s+--assert-support-predepends\b/) {
                    tag 'ancient-dpkg-predepends-check', "$file:$.";
                }
                if (m/${LEADIN}dpkg\s+--assert-working-epoch\b/) {
                    tag 'ancient-dpkg-epoch-check', "$file:$.";
                }
                if (m/${LEADIN}dpkg\s+--assert-long-filenames\b/) {
                    tag 'ancient-dpkg-long-filenames-check', "$file:$.";
                }
                if (m/${LEADIN}dpkg\s+--assert-multi-conrep\b/) {
                    tag 'ancient-dpkg-multi-conrep-check', "$file:$.";
                }

                # Commands that should not be used in maintainer scripts.
                if (m,${LEADIN}(?:/usr/bin/)?fc-cache(\s|\Z),) {
                    tag 'fc-cache-used-in-maintainer-script', "$file:$.";
                }

                # Check for running commands with a leading path.
                #
                # Unfortunately, our $LEADIN string doesn't work well for this
                # in the presence of commands that contain backquoted
                # expressions because it can't tell the difference between the
                # initial backtick and the closing backtick.  We therefore
                # first extract all backquoted expressions and check them
                # separately, and then remove them from a copy of a string and
                # then check it for bashisms.
                while (m,\`([^\`]+)\`,g) {
                    my $cmd = $1;
                    if ($cmd =~ m,$LEADIN(/(usr/)?s?bin/[\w.+-]+)(\s|;|\z),) {
                        tag 'command-with-path-in-maintainer-script',
                            "$file:$. $1";
                    }
                }
                my $cmd = $_;
                $cmd =~ s/\`[^\`]+\`//g;
                if ($cmd =~ m,$LEADIN(/(?:usr/)?s?bin/[\w.+-]+)(?:\s|;|$),) {
                    tag 'command-with-path-in-maintainer-script', "$file:$. $1";
                }
            }
        }
        if (m,\bsuidregister\b,) {
            tag 'suidregister-used-in-maintainer-script', $file;
        }
        if ($file eq 'preinst') {
            if (m/^\s*dpkg-maintscript-helper(?:\s|\z)/ &&
                    !$info->relation('pre-depends')->implies('dpkg (>= 1.15.7.2~)')) {
                tag 'preinst-uses-dpkg-maintscript-helper-without-predepends', "$file:$.";
            }
        }
        if ($file eq 'postrm') {
            if (m,update\-alternatives \-\-remove,) {
                tag 'update-alternatives-remove-called-in-postrm';
            }
        } else {
            for my $rule (@depends_needed) {
                my ($package, $regex) = @$rule;
                if ($pkg ne $package and /$regex/ and ! $warned{$package}) {
                    if (m,-x\s+\S*$regex, or m,(which|type)\s+$regex, or m,command\s+.*?$regex,) {
                        $warned{$package} = 1;
                    } elsif (!/\|\|\s*true\b/) {
                        unless ($info->relation('strong')->implies($package)) {
                            my $shortpackage = $package;
                            $shortpackage =~ s/[ \(].*//;
                            tag "maintainer-script-needs-depends-on-$shortpackage", $file;
                            $warned{$package} = 1;
                        }
                    }
                }
            }
        }
        if (m,\bgconftool(-2)?(\s|\Z),) {
            tag 'gconftool-used-in-maintainer-script', "$file:$.";
        }
        if (m,\binstall-sgmlcatalog\b, && !(m,--remove, && ($file eq 'prerm' || $file eq 'postinst'))) {
            tag 'install-sgmlcatalog-deprecated', "$file:$.";
        }
        if (m,\binstall-info\b,) {
            tag 'install-info-used-in-maintainer-script', "$file:$.";
        }
        if (m,/var/lib/dpkg/status\b, && $pkg ne 'base-files' && $pkg ne 'dpkg') {
            tag 'maintainer-script-uses-dpkg-status-directly', $file;
        }
        if (m,$LEADIN(?:/usr/sbin/)?dpkg-divert\s, && ! /--(?:help|list|truename|version)/) {
            if (/--local/ or !/--package/) {
                tag 'package-uses-local-diversion', "$file:$.";
            }
            my $mode = /--remove/ ? 'remove' : 'add';
            my ($divert) = /dpkg-divert\s*(.*)$/;
            $divert =~ s/\s*(?:\${?[\w:=-]+}?)*\s*--(?:add|quiet|remove|rename|test|local|(:?admindir|divert|package)\s+\S+)\s*//g;
            # Remove unpaired opening or closing parenthesis
            1 while($divert =~ m/\G.*?\(.+?\)/gc);
            $divert =~ s/\G(.*?)[()]/$1/;
            pos($divert) = undef;
            # Remove unpaired opening or closing braces
            1 while($divert =~ m/\G.*?{.+?}/gc);
            $divert =~ s/\G(.*?)[{}]/$1/;
            pos($divert) = undef;

            # position after the last pair of quotation marks, if any
            1 while($divert =~ m/\G.*?(\"|\').+?\1/gc);
            # Strip anything matching and after '&&', '||', ';', or '>'
            # this is safe only after we are positioned after the last pair
            # of quotation marks
            $divert =~ s/\G.+?\K(?: && | \|\| | ; | \d*> ).*$//x;
            pos($divert) = undef;
            # Remove quotation marks, they affect:
            # * our var to regex trick
            # * stripping the initial slash if the path was quoted
            $divert =~ s/[\"\']//g;
            # remove the leading / because it's not in the index hash
            $divert =~ s,^/,,;

            # remove any remaining leading or trailing whitespace.
            $divert =~ s/^\s+//;
            $divert =~ s/\s+$//;

            $divert = quotemeta($divert);

            # For now just replace variables, they will later be normalised
            $expand_diversions = 1 if $divert =~ s/\\\$\w+/.+/g;
            $expand_diversions = 1 if $divert =~ s/\\\$\\{\w+.*?\\}/.+/g;
            # handle $() the same way:
            $expand_diversions = 1 if $divert =~ s/\\\$\\\(.+?\\\)/.+/g;

            if ($mode eq 'add') {
                $added_diversions{$divert} = {'script' => $file, 'line' => $.};
            } elsif ($mode eq 'remove') {
                push @{$removed_diversions{$divert}}, {'script' => $file, 'line' => $.};
            } else {
                fail "Internal error: \$mode has unknown value: $mode";
            }
        }
    }

    if ($saw_init && ! $saw_invoke) {
        tag 'maintainer-script-calls-init-script-directly', "$file:$saw_init";
    }
    unless ($has_code) {
        tag 'maintainer-script-empty', $file;
    }
    if ($shellscript && !$saw_sete) {
        if ($saw_bange) {
            tag 'maintainer-script-without-set-e', $file;
        } else {
            tag 'maintainer-script-ignores-errors', $file;
        }
    }

    close C;

}
close(SCRIPTS);

# If any of the maintainer scripts used a variable in the file or
# diversion name normalise them all
if ($expand_diversions) {
    for my $divert (keys %removed_diversions, keys %added_diversions) {

        # if a wider regex was found, the entries might no longer be there
        unless (exists($removed_diversions{$divert})
            or exists($added_diversions{$divert})) {
            next;
        }

        my $widerrx = $divert;
        my $wider = $widerrx;
        $wider =~ s/\\//g;

        # find the widest regex:
        my @matches = grep {
            my $lrx = $_;
            my $l = $lrx;
            $l =~ s/\\//g;

            if ($wider =~ m/^$lrx$/) {
                $widerrx = $lrx;
                $wider = $l;
                1;
            } elsif ($l =~ m/^$widerrx$/) {
                1;
            } else {
                0;
            }
        } (keys %removed_diversions, keys %added_diversions);

        # replace all the occurences with the widest regex:
        for my $k (@matches) {
            next if ($k eq $widerrx);

            if (exists($removed_diversions{$k})) {
                $removed_diversions{$widerrx} = $removed_diversions{$k};
                delete $removed_diversions{$k};
            }
            if (exists($added_diversions{$k})) {
                $added_diversions{$widerrx} = $added_diversions{$k};
                delete $added_diversions{$k};
            }
        }
    }
}

for my $divert (keys %removed_diversions) {
    if (exists $added_diversions{$divert}) {
        # just mark the entry, because a --remove might
        # happen in two branches in the script, i.e. we
        # see it twice, which is not a bug
        $added_diversions{$divert}{'removed'} = 1;
    } else {
        for my $item (@{$removed_diversions{$divert}}) {
            my $script = $item->{'script'};
            my $line = $item->{'line'};

            next unless ($script eq 'postrm');

            # Allow preinst and postinst to remove diversions the
            # package doesn't add to clean up after previous
            # versions of the package.

            $divert = unquote($divert, $expand_diversions);

            tag 'remove-of-unknown-diversion', $divert, "$script:$line";
        }
    }
}

for my $divert (keys %added_diversions) {
    my $script = $added_diversions{$divert}{'script'};
    my $line = $added_diversions{$divert}{'line'};

    my $divertrx = $divert;
    $divert = unquote($divert, $expand_diversions);

    if (not exists $added_diversions{$divertrx}{'removed'}) {
        tag 'orphaned-diversion', $divert, $script;
    }

    # Handle man page diversions somewhat specially.  We may divert away a man
    # page in one section without replacing that same file, since we're
    # installing a man page in a different section.  An example is diverting a
    # man page in section 1 and replacing it with one in section 1p (such as
    # libmodule-corelist-perl at the time of this writing).
    #
    # Deal with this by turning all man page diversions into wildcard
    # expressions instead that match everything in the same numeric section so
    # that they'll match the files shipped in the package.
    if ($divertrx =~ m,^(usr\\/share\\/man\\/\S+\\/.*\\\.\d)\w*(\\\.gz\z),) {
        $divertrx = "$1.*$2";
        $expand_diversions = 1;
    }

    if ($expand_diversions) {
        tag 'diversion-for-unknown-file', $divert, "$script:$line"
            unless (grep { $_ =~ m/$divertrx/ } @{$info->sorted_index});
    } else {
        tag 'diversion-for-unknown-file', $divert, "$script:$line"
            unless (exists $info->index->{$divert});
    }
}

}

# -----------------------------------

# Returns non-zero if the given file is not actually a shell script,
# just looks like one.
sub script_is_evil_and_wrong {
    my ($filename) = @_;
    my $ret = 0;
    open (IN, '<', $filename) or fail("cannot open $filename: $!");
    my $i = 0;
    my $var = '0';
    my $backgrounded = 0;
    local $_;
    while (<IN>) {
        chomp;
        next if m/^#/o;
        next if m/^$/o;
        last if (++$i > 55);
        if (m~
            # the exec should either be "eval"ed or a new statement
            (^\s*|\beval\s*[\'\"]|(;|&&|\b(then|else))\s*)

            # eat anything between the exec and $0
            exec\s*.+\s*

            # optionally quoted executable name (via $0)
            .?\$$var.?\s*

            # optional "end of options" indicator
            (--\s*)?

            # Match expressions of the form '${1+$@}', '${1:+"$@"',
            # '"${1+$@', "$@", etc where the quotes (before the dollar
            # sign(s)) are optional and the second (or only if the $1
            # clause is omitted) parameter may be $@ or $*.
            #
            # Finally the whole subexpression may be omitted for scripts
            # which do not pass on their parameters (i.e. after re-execing
            # they take their parameters (and potentially data) from stdin
            .?(\${1:?\+.?)?(\$(\@|\*))?~x) {
            $ret = 1;
            last;
        } elsif (/^\s*(\w+)=\$0;/) {
            $var = $1;
        } elsif (m~
            # Match scripts which use "foo $0 $@ &\nexec true\n"
            # Program name
            \S+\s+

            # As above
            .?\$$var.?\s*
            (--\s*)?
            .?(\${1:?\+.?)?(\$(\@|\*))?.?\s*\&~x) {

            $backgrounded = 1;
        } elsif ($backgrounded and m~
            # the exec should either be "eval"ed or a new statement
            (^\s*|\beval\s*[\'\"]|(;|&&|\b(then|else))\s*)
            exec\s+true(\s|\Z)~x) {

            $ret = 1;
            last;
        }
    }
    close IN;
    return $ret;
}

# Given an interpretor and a file, run the interpretor on that file with the
# -n option to check syntax, discarding output and returning the exit status.
sub check_script_syntax {
    my ($interpreter, $script) = @_;
    my $pid = fork;
    if (!defined $pid) {
        fail("cannot fork: $!");
    } elsif ($pid == 0) {
        open STDOUT, '>', '/dev/null' or fail("cannot reopen stdout: $!");
        open STDERR, '>&STDOUT' or fail("cannot reopen stderr: $!");
        exec $interpreter, '-n', $script
            or fail("cannot exec $interpreter: $!");
    } else {
        waitpid $pid, 0;
    }
    return $?;
}

sub remove_comments {
    local $_;

    my $line = shift || '';
    $_ = $line;

    # Remove quoted strings so we can more easily ignore comments
    # inside them
    s/(^|[^\\](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g;
    s/(^|[^\\](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g;

    # If the remaining string contains what looks like a comment,
    # eat it. In either case, swap the unmodified script line
    # back in for processing (if required) and return it.
    if (m/(?:^|[^[\\])[\s\&;\(\)](\#.*$)/) {
        $_ = $line;
        s/\Q$1\E//;  # eat comments
    } else {
        $_ = $line;
    }

    return $_;
}

sub unquote($$) {
    my ($string, $replace_regex) = @_;

    $string =~ s,\\,,g;
    if ($replace_regex) {
        $string =~ s,\.\+,*,g;
    }

    return $string;
}

1;

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
