# binaries -- lintian check script -*- perl -*-

# Copyright (C) 1998 Christian Schwarz and Richard Braakman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::binaries;
use strict;
use warnings;

use Util;
use Lintian::Check qw(check_spelling);
use Lintian::Tags qw(tag);
use Lintian::Output qw(debug_msg);

use File::Spec;

# Table based on checks/emdebian's %archdetecttable, as found in
# emdebian-tools.
our %ARCH_REGEX = (
        '32'            => qr'ELF 32-bit',
        '64'            => qr'ELF 64-bit',
        'alpha'         => qr'ELF 64-bit LSB .* Alpha',
        'amd64'         => qr'ELF 64-bit LSB .* x86-64, .* (?:GNU/Linux|(?!GNU))',
        'arm'           => qr'ELF 32-bit LSB .* ARM, version \d,',
        'armeb'         => qr'ELF 32-bit MSB .* ARM',
        'armel'         => qr'ELF 32-bit LSB .* ARM, .* \(SYSV\)',
        'armhf'         => qr'ELF 32-bit LSB .* ARM, .* \(SYSV\)',
#       'avr32'         => qr'ELF 32-bit MSB .* \(SYSV\)',
        'hppa'          => qr'ELF 32-bit MSB .* PA-RISC',
        'hppa64'        => qr'ELF 64-bit MSB .* PA-RISC',
        'hurd-i386'     => qr'ELF 32-bit LSB .* Intel 80386, .* (?:GNU/Hurd|(?!GNU))',
        'i386'          => qr'ELF 32-bit LSB .* 80386, .* (?:GNU/Linux|(?!GNU))',
        'ia64'          => qr'ELF 64-bit LSB .* IA-64',
        'kfreebsd-amd64'=> qr'ELF 64-bit LSB .* x86-64, .* (?:GNU/kFreeBSD|(?!GNU))',
        'kfreebsd-i386' => qr'ELF 32-bit LSB .* 80386, .* (?:GNU/kFreeBSD|(?!GNU))',
        'lpia'          => qr'ELF 32-bit LSB .* 80386, .* (?:GNU/Linux|(?!GNU))',
        'm32r'          => qr'ELF 32-bit MSB .* M32R',
        'm68k'          => qr'ELF 32-bit MSB .* 680[02]0',
        'mips'          => qr'ELF 32-bit MSB .* MIPS',
        'mipsel'        => qr'ELF 32-bit LSB .* MIPS',
#       'mipsn32'       => qr'ELF 32-bit LSB .* MIPS.* N32',
        'mips64'        => qr'ELF 64-bit MSB .* MIPS',
        'mipsel64'      => qr'ELF 64-bit LSB .* MIPS',
        'powerpc'       => qr'ELF 32-bit MSB .* PowerPC',
        'powerpcspe'    => qr'ELF 32-bit MSB .* PowerPC .* cisco 4500',
        'ppc64'         => qr'ELF 64-bit MSB .* PowerPC',
        's390'          => qr'ELF 32-bit MSB .* S.390',
        's390x'         => qr'ELF 64-bit MSB .* S.390',
        'sh4'           => qr'ELF 32-bit LSB .* Renesas SH',
        'sparc'         => qr'ELF 32-bit MSB .* SPARC',
#       'sparcv9b'      => qr'ELF 32-bit MSB .* SPARC.* V8\+',
        'sparc64'       => qr'ELF 64-bit MSB .* SPARC');

our %ARCH_64BIT_EQUIVS = (
        'hppa'          => 'hppa64',
        'i386'          => 'amd64',
        'kfreebsd-i386' => 'kfreebsd-amd64',
        'mips'          => 'mips64',
        'mipsel'        => 'mipsel64',
        'powerpc'       => 'ppc64',
        's390'          => 's390x',
        'sparc'         => 'sparc64',
);

our %EMBEDDED_LIBRARIES = (
        # We exclude version strings starting with "4 " since that's a mark of the
        # Pascal implementation, which is not what this tag is designed to detect.
        # (The "4" is actually the string length (52 characters) in the Pascal
        # counted string format.)
        'zlib'          => {
                            source => qr'(?:zlib|klibc|kfreebsd-kernel-di-\w+)',
                            match => qr'(?m)(?<!4 )(?:in|de)flate (?:\d[ \w.\-]{1,20}[\w.\-])'
                           },

        'bzip2'         => qr'(?m)^This is a bug in bzip2',
        'expat'         => qr'(?m)^requested feature requires XML_DTD support in Expat',
        'file'          => qr'(?m)^could not find any magic files',
        'libxml2'       => qr'root and DTD name do not match',
        'pcre3'         => qr'this version of PCRE is not compiled with PCRE_UTF8 support',
        'tiff'          => {
                            source => qr/tiff(?:\d+)?/,
                            match => qr'No space for PixarLog state block',
                           },
        'ftgl'          => qr'FTGlyphContainer',
        't1lib'         => qr't1lib is copyright \(c\) Rainer Menzner',
        'gl2ps'         => qr'\(C\) 1999-2009 C\. Geuzaine',
        'libgd2'        => qr'gd-(?:png|jpeg:) error:',
        'ncurses'       => qr'Not enough memory to create terminal structure',
        'openssl'       => qr'You need to read the OpenSSL FAQ',
        'sqlite'        => {source => qr'sqlite3?', match => qr'CREATE TABLE sqlite_master\('},
        'libm'          => {source => qr'eglibc', match => qr'neg\*\*non-integral: DOMAIN error'},
        'ltdl'          => {source => qr'libtool', match => qr'(?m)^library already shutdown'},
        'curl'          => qr'A libcurl function was given a bad argument',
        'libmng'        => qr'TERM misplaced during creation of MNG stream',
        'libmsn'        => qr'The MSN server has terminated the connection with an unknown reason code\.',
        'libmikmod'     => qr'APUN \(APlayer\) and UNI \(MikMod\)',
        'libmysqlclient'=> {source => qr'mysql-\d.*', match => qr'MySQL client ran out of memory'},
        'libpng'        => qr'(?m)^Potential overflow in png_zalloc',
        'libjpeg'       => { source => qr'^libjpeg.*',
                             match => qr'(?m)^Caution: quantization tables are too coarse for baseline JPEG'},
        'openjpeg'      => qr'tcd_decode: incomplete bistream',
        'tinyxml'       => qr'Error when TiXmlDocument added to document',
        'libpcap'       => qr'(?:pcap_activate: The "any" device isn\'t supported|corrupted frame on kernel ring mac offset)',
        'glee'          => qr'Extension name exceeds 1023 characters.',
        'glew'          => qr'Missing GL version',
        'libtheora'     => qr'Xiph.Org libtheora ',
);

our $MULTIARCH_DIRS = Lintian::Data->new('binaries/multiarch-dirs', '\s+');

sub run {

my $pkg = shift;
my $type = shift;
my $info = shift;
my $proc = shift;

my $arch;
my $multiarch;
my $madir;
my $gnu_triplet_re;
my $ruby_triplet_re;
my $dynsyms = 0;
my $needs_libc = '';
my $needs_libc_file;
my $needs_libc_count = 0;
my $needs_depends_line = 0;
my $has_perl_lib = 0;
my $has_php_ext = 0;
my $uses_numpy_c_abi = 0;

my %SONAME;

$arch = $info->field('architecture')//'';
$multiarch = $info->field('multi-arch')//'no';
my $srcpkg = $proc->pkg_src()//'';

foreach my $file (sort keys %{$info->objdump_info}) {
    my $objdump = $info->objdump_info->{$file};

    if (defined $objdump->{SONAME}) {
        foreach my $soname (@{$objdump->{SONAME}}) {
            $SONAME{$soname} ||= [];
            push @{$SONAME{$soname}}, $file;
        }
    }
    foreach my $symbol (@{$objdump->{SYMBOLS}}) {
        my ($foo, $sec, $sym) = @{$symbol};
        if ($arch ne 'hppa') {
            if ($foo eq '.text' and $sec eq 'Base' and
                $sym eq '__gmon_start__') {
                tag 'binary-compiled-with-profiling-enabled', $file;
            }
        } else {
            if ( ($sec =~ /^GLIBC_.*/) and ($sym eq '_mcount') ) {
                tag 'binary-compiled-with-profiling-enabled', $file;
            }
        }
    }
    foreach (@{$objdump->{NOTES}}) {
        if ($_ eq 'File format not recognized') {
            tag 'apparently-corrupted-elf-binary', $file;
        } elsif ($_ eq 'File truncated') {
            tag 'apparently-truncated-elf-binary', $file;
        } elsif ($_ eq 'Packed with UPX') {
            tag 'binary-file-compressed-with-upx', $file;
        } elsif ($_ eq 'Invalid operation') {
            tag 'binary-with-bad-dynamic-table', $file unless $file =~ m%^usr/lib/debug/%;
        }
    }
}

# For the package naming check, filter out SONAMEs where all the files are at
# paths other than /lib, /usr/lib, or /usr/X11R6/lib.  This avoids false
# positives with plugins like Apache modules, which may have their own SONAMEs
# but which don't matter for the purposes of this check.  Also filter out
# nsswitch modules
$madir = $MULTIARCH_DIRS->value($arch);

# In the case that the architecture is "all" or unknown (or we do
# not know the multi-arch path for a known architecture) , we assume
# it the multi-arch path to be this (hopefully!) non-existent path to
# avoid warnings about uninitialized variables.
$madir = './!non-existant-path!/./' unless defined $madir;

$gnu_triplet_re = quotemeta $madir;
$gnu_triplet_re =~ s,^i386,i[3-6]86,;
$ruby_triplet_re = $gnu_triplet_re;
$ruby_triplet_re =~ s,linux\\-gnu$,linux,;
$ruby_triplet_re =~ s,linux\\-gnu,linux\\-,;

sub lib_soname_path {
    my ($dir, @paths) = @_;
    foreach my $path (@paths) {
        next if $path =~ m%^(?:usr/)?lib(?:32|64)?/libnss_[^.]+\.so(?:\.[0-9]+)$%;
        return 1 if $path =~ m%^lib/[^/]+$%;
        return 1 if $path =~ m%^usr/lib/[^/]+$%;
        return 1 if defined $dir && $path =~ m%lib/$dir/[^/]++$%;
        return 1 if defined $dir && $path =~ m%usr/lib/$dir/[^/]++$%;
    }
    return 0;
}
my @sonames = sort grep { lib_soname_path($madir, @{$SONAME{$_}}) } keys %SONAME;

# try to identify transition strings
my $base_pkg = $pkg;
$base_pkg =~ s/c102\b//o;
$base_pkg =~ s/c2a?\b//o;
$base_pkg =~ s/\dg$//o;
$base_pkg =~ s/gf$//o;
$base_pkg =~ s/-udeb$//o;
$base_pkg =~ s/^lib64/lib/o;

my $match_found = 0;
foreach my $expected_name (@sonames) {
    $expected_name =~ s/([0-9])\.so\./$1-/;
    $expected_name =~ s/\.so(?:\.|\z)//;
    $expected_name =~ s/_/-/g;

    if ((lc($expected_name) eq $pkg)
        || (lc($expected_name) eq $base_pkg)) {
        $match_found = 1;
        last;
    }
}

tag 'package-name-doesnt-match-sonames', "@sonames"
    if @sonames && !$match_found;

my %directories;
foreach (@{$info->sorted_index}) {
    next unless length $_;
    # create copy, don't modify the index
    my $path = $_;

    my $ftype = $info->index->{$path}->{'type'};
    next unless ($ftype eq 'd' || $ftype eq 'l');
    $path =~ s,/\z,,;
    $directories{"/$path"}++;
}

# If we have an unknown architecture, pretend that all binaries are fine.
if ($arch ne 'all' and not exists($ARCH_REGEX{$arch})) {
    debug_msg(1, "Unknown architecture: $arch");
    $ARCH_REGEX{$arch} = qr/./;
}

# process all files in package
foreach my $file (@{$info->sorted_file_info}) {
    my $fileinfo = $info->file_info->{$file};
    my $objdump = $info->objdump_info->{$file};

    # binary or object file?
    next unless ($fileinfo =~ m/^[^,]*\bELF\b/) or ($fileinfo =~ m/\bcurrent ar archive\b/);

    # Warn about Architecture: all packages that contain shared libraries.
    if ($arch eq 'all') {
        tag 'arch-independent-package-contains-binary-or-object', $file;
    }

    if ($file =~ m,^etc/,) {
        tag 'binary-in-etc', $file;
    }

    if ($file =~ m,^usr/share/,) {
        tag 'arch-dependent-file-in-usr-share', $file;
    }

    if ($multiarch eq 'same') {
        unless ($file =~ m,/(?:$gnu_triplet_re|$ruby_triplet_re|java-\d+-openjdk-\Q$arch\E|\.build-id)/,) {
            tag 'arch-dependent-file-not-in-arch-specific-directory', $file;
        }
    }

    # ELF?
    next unless $fileinfo =~ m/^[^,]*\bELF\b/o;

    if ($arch ne 'all' and $fileinfo !~ m/$ARCH_REGEX{$arch}/) {
        if ($file =~ m,(?:^|/)lib(\d{2})/, or $file =~ m,^emul/ia(\d{2}),) {
            tag 'binary-from-other-architecture', $file
                unless ($fileinfo =~ m/$ARCH_REGEX{$1}/);
        } elsif ($arch eq 'amd64' and $fileinfo =~ m/$ARCH_REGEX{i386}/) {
            # Ignore i386 binaries in amd64 packages for right now.
        } elsif (exists $ARCH_64BIT_EQUIVS{$arch}
            and $fileinfo =~ m/$ARCH_REGEX{$ARCH_64BIT_EQUIVS{$arch}}/
            and $file =~ m,^lib/modules/,) {
            # Allow amd64 kernel modules to be installed on i386.
        } else {
            tag 'binary-from-other-architecture', $file;
        }
    }

    my $strings = slurp_entire_file("strings/$file");
    check_spelling('spelling-error-in-binary', $strings, $file, { $pkg => 1 });

    # stripped?
    if ($fileinfo =~ m,not stripped\s*$,o) {
        # Is it an object file (which generally can not be stripped),
        # a kernel module, debugging symbols, or perhaps a debugging package?
        unless ($file =~ m,\.k?o$, or $pkg =~ m/-dbg$/ or $pkg =~ m/debug/
                or $file =~ m,/lib/debug/, or $file =~ m,\.gox$,o) {
            if ($fileinfo =~ m/executable/
                and $strings =~ m/^Caml1999X0[0-9][0-9]$/m) {
                # Check for OCaml custom executables (#498138)
                tag 'ocaml-custom-executable', $file;
            } else {
                tag 'unstripped-binary-or-object', $file;
            }
        }
    } else {
        # stripped but a debug or profiling library?
        if (($file =~ m,/lib/debug/,o) or ($file =~ m,/lib/profile/,o)) {
            tag 'library-in-debug-or-profile-should-not-be-stripped', $file;
        } else {
            # appropriately stripped, but is it stripped enough?
            if (exists $objdump->{NOTE_SECTION}) {
                tag 'binary-has-unneeded-section', "$file .note";
            }
            if (exists $objdump->{COMMENT_SECTION}) {
                tag 'binary-has-unneeded-section', "$file .comment";
            }
        }
    }

    # rpath is disallowed, except in private directories
    if (exists $objdump->{RPATH}) {
        foreach my $rpath (map {File::Spec->canonpath($_)} keys %{$objdump->{RPATH}}) {
            next if $rpath =~ m,^/usr/lib/(?:$madir/)?(?:games/)?(?:\Q$pkg\E|\Q$srcpkg\E)(?:/|\z),;
            next if $rpath =~ m,^\$\{?ORIGIN\}?,;
            next if $directories{$rpath} and $rpath !~ m,^(?:/usr)?/lib(?:/$madir)?/?\z,;
            tag 'binary-or-shlib-defines-rpath', "$file $rpath";
        }
    }

    while (my ($src, $regex) = each %EMBEDDED_LIBRARIES) {
        if (ref $regex eq 'HASH') {
            next if ($proc->pkg_src =~ m/^$regex->{'source'}$/);

            $regex = $regex->{'match'};
        } elsif ($proc->pkg_src eq $src) {
            next;
        }
        if ($strings =~ /$regex/) {
            tag 'embedded-library', "$file: $src";
        }
    }

    # binary or shared object?
    next unless ($fileinfo =~ m/executable/) or ($fileinfo =~ m/shared object/);
    next if $type eq 'udeb';

    # Perl library?
    if ($file =~ m,^usr/lib/perl5/.*\.so$,) {
        $has_perl_lib = 1;
    }

    # PHP extension?
    if ($file =~ m,^usr/lib/php\d/.*\.so(?:\.\d+)*$,) {
        $has_php_ext = 1;
    }

    # Python extension using Numpy C ABI?
    if ($file =~ m,usr/lib/(?:pyshared/)?python2\.\d+/.*(?<!_d)\.so$,) {
        if ($strings =~ m,module compiled against ABI version %x but this version of numpy is %x,) {
            $uses_numpy_c_abi = 1;
        }
    }

    # Something other than detached debugging symbols in /usr/lib/debug paths.
    if ($file =~ m,^usr/lib/debug/(?:lib\d*|s?bin|usr|opt|dev|emul)/,) {
        if (exists($objdump->{NEEDED})) {
            tag 'debug-file-should-use-detached-symbols', $file;
        }
    }

    # Detached debugging symbols directly in /usr/lib/debug.
    if ($file =~ m,^usr/lib/debug/[^/]+$,) {
        unless (exists($objdump->{NEEDED})
            || $fileinfo =~ m/statically linked/) {
            tag 'debug-symbols-directly-in-usr-lib-debug', $file;
        }
    }

    # statically linked?
    if (!exists($objdump->{NEEDED}) || !defined($objdump->{NEEDED})) {
        if ($fileinfo =~ m/shared object/o) {
            # Some exceptions: detached debugging information and the dynamic
            # loader (which itself has no dependencies).
            next if ($file =~ m%^usr/lib/debug/%);
            next if ($file =~ m%^lib(?:|32|64)/(?:[\w/]+/)?ld-[\d.]+\.so$%);
            tag 'shared-lib-without-dependency-information', $file;
        } else {
            # Some exceptions: files in /boot, /usr/lib/debug/*, named *-static or
            # *.static, or *-static as package-name.
            next if ($file =~ m%^boot/%);
            next if ($file =~ /[\.-]static$/);
            next if ($pkg =~ /-static$/);
            # klibc binaries appear to be static.
            next if ($objdump->{KLIBC});
            # Location of debugging symbols.
            next if ($file =~ m%^usr/lib/debug/%);
            # ldconfig must be static.
            next if ($file eq 'sbin/ldconfig');
            tag 'statically-linked-binary', $file;
        }
    } else {
        my $lib;
        my $no_libc = 1;
        $needs_depends_line = 1;
        for $lib (@{$objdump->{NEEDED}}) {
            if ($lib =~ /^libc\.so\.(\d+.*)/) {
                $needs_libc = "libc$1";
                $needs_libc_file = $file unless $needs_libc_file;
                $needs_libc_count++;
                $no_libc = 0;
            }
        }
        if ($no_libc and not $file =~ m,/libc\b,) {
            if ($fileinfo =~ m/shared object/) {
                tag 'library-not-linked-against-libc', $file;
            } else {
                tag 'program-not-linked-against-libc', $file;
            }
        }
    }
}

# Find the package dependencies, which is used by various checks.
my $depends = '';
if (defined $info->field('pre-depends')) {
    $depends = $info->field('pre-depends');
}
if (defined $info->field('depends')) {
    $depends .= ', ' if $depends;
    $depends .= $info->field('depends');
}
$depends =~ s/\n/ /g;

# Check for a libc dependency.
if ($needs_depends_line) {
    if ($depends && $needs_libc && $pkg !~ /^libc[\d.]+(?:-|\z)/) {
        # Match libcXX or libcXX-*, but not libc3p0.
        my $re = qr/(?:^|,)\s*\Q$needs_libc\E\b/;
        if ($depends !~ /$re/) {
            my $others = '';
            $needs_libc_count--;
            if ($needs_libc_count > 0) {
                $others = " and $needs_libc_count others";
            }
            tag 'missing-dependency-on-libc',
                "needed by $needs_libc_file$others";
        }
    } elsif (!$depends) {
        tag 'missing-depends-line';
    }
}

# Check for a Perl dependency.
if ($has_perl_lib) {
    my $re = qr/(?:^|,)\s*perlapi-[\d.]+(?:\s*\[[^\]]+\])?\s*(?:,|\z)/;
    unless ($depends =~ /$re/) {
        tag 'missing-dependency-on-perlapi';
    }
}

# Check for a phpapi- dependency.
if ($has_php_ext) {
    unless ($depends =~ /(?:^|,)\s*phpapi-[\d\w+]+\s*(?:,|\z)/) {
        tag 'missing-dependency-on-phpapi';
    }
}

# Check for dependency on python-numpy-abiN dependency (or strict versioned
# dependency on python-numpy)
if ($uses_numpy_c_abi and $pkg ) {
    tag 'missing-dependency-on-numpy-abi'
        unless
            $depends =~ m/(?:^|,)\s*python-numpy-abi\d+\s*(?:,|\z)/ or (
            $depends =~ m/(?:^|,)\s*python-numpy\s+\(>[>=]/ and
            $depends =~ m/(?:^|,)\s*python-numpy\s+\(<[<=]/) or
            $pkg eq 'python-numpy';
}

}

1;

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
