# -*- perl -*-
# Lintian::Collect::Package -- interface to data collection for packages
# Copyright (C) 2011 Niels Thykier
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see .
# This handles common things for things available in source and binary packages
package Lintian::Collect::Package;
use strict;
use warnings;
use base 'Lintian::Collect';
use Carp qw(croak);
use Util qw(perm2oct);
# Returns the path to the dir where the package is unpacked
# or a file therein (see pod below)
# May croak if the package has not been unpacked.
# sub unpacked Needs-Info unpacked
sub unpacked {
my ($self, $file) = @_;
return $self->_fetch_extracted_dir('unpacked', 'unpacked', $file);
}
# Returns the information from collect/file-info
sub file_info {
my ($self) = @_;
return $self->{file_info} if exists $self->{file_info};
my $base_dir = $self->base_dir();
my %file_info;
# sub file_info Needs-Info file-info
open(my $idx, '<', "$base_dir/file-info")
or croak "cannot open $base_dir/file-info: $!";
while (<$idx>) {
chomp;
m/^(.+?)\x00\s+(.*)$/o
or croak "an error in the file pkg is preventing lintian from checking this package: $_";
my ($file, $info) = ($1,$2);
$file =~ s,^\./,,o;
$file =~ s,/+$,,o;
$file_info{$file} = $info;
}
close $idx;
$self->{file_info} = \%file_info;
return $self->{file_info};
}
# Returns the information from the indices
# FIXME: should maybe return an object
# sub index Needs-Info index
sub index {
my ($self) = @_;
return $self->_fetch_index_data('index', 'index', 'index-owner-id');
}
# Returns sorted file index (eqv to sort keys %{$info->index}), except it is cached.
# sub sorted_index Needs-Info index
sub sorted_index {
my ($self) = @_;
my $index;
my @result;
return $self->{sorted_index} if exists $self->{sorted_index};
$index = $self->index();
@result = sort keys %{$index};
$self->{sorted_index} = \@result;
return \@result;
}
# Backing method for unpacked, debfiles and others; this is not a part of the
# API.
# sub _fetch_extracted_dir Needs-Info <>
sub _fetch_extracted_dir {
my ($self, $field, $dirname, $file) = @_;
my $dir = $self->{$field};
if ( not defined $dir ) {
my $base_dir = $self->base_dir;
$dir = "$base_dir/$dirname";
croak "$field ($dirname) is not available" unless -d "$dir/";
$self->{$field} = $dir;
}
if ($file) {
# strip leading ./ - if that leaves something, return the path there
$file =~ s,^\.?/*+,,go;
return "$dir/$file" if $file;
}
return $dir;
}
# Backing method for index and others; this is not a part of the API.
# sub _fetch_index_data Needs-Info <>
sub _fetch_index_data {
my ($self, $field, $index, $indexown) = @_;
return $self->{$field} if exists $self->{$index};
my $base_dir = $self->base_dir();
my (%idxh, %dir_counts);
my $num_idx;
my %rhlinks;
open my $idx, '<', "$base_dir/$index"
or croak "cannot open index file $base_dir/$index: $!";
if ($indexown) {
open $num_idx, '<', "$base_dir/$indexown"
or croak "cannot open index file $base_dir/$indexown: $!";
}
while (<$idx>) {
chomp;
my (%file, $perm, $owner, $name);
($perm,$owner,$file{size},$file{date},$file{time},$name) =
split(' ', $_, 6);
$file{operm} = perm2oct($perm);
$file{type} = substr $perm, 0, 1;
if ($num_idx) {
# If we have a "numeric owner" index file, read that as well
my $numeric = <$num_idx>;
chomp $numeric;
croak 'cannot read index file $indexown' unless defined $numeric;
my ($owner_id, $name_chk) = (split(' ', $numeric, 6))[1, 5];
croak "mismatching contents of index files: $name $name_chk"
if $name ne $name_chk;
($file{uid}, $file{gid}) = split '/', $owner_id, 2;
}
($file{owner}, $file{group}) = split '/', $owner, 2;
$file{owner} = 'root' if $file{owner} eq '0';
$file{group} = 'root' if $file{group} eq '0';
$name =~ s,^\./,,;
if ($name =~ s/ link to (.*)//) {
my $target = $1;
$target =~ s,^\./,,;
$file{type} = 'h';
$file{link} = $target;
push @{$rhlinks{$target}}, $name;
} elsif ($file{type} eq 'l') {
($name, $file{link}) = split ' -> ', $name, 2;
}
$file{name} = $name;
# count directory contents:
$dir_counts{$name} ||= 0 if $file{type} eq 'd';
$dir_counts{$1} = ($dir_counts{$1} || 0) + 1
if $name =~ m,^(.+/)[^/]+/?$,;
$idxh{$name} = \%file;
}
foreach my $file (keys %idxh) {
my $e = $idxh{$file};
if ($dir_counts{$e->{name}}) {
$e->{count} = $dir_counts{$e->{name}};
}
if ($rhlinks{$e->{name}}) {
# There is hard link pointing to this file (or hardlink).
my %candidates = ();
my @check = ($e->{name});
my @sorted;
my $target;
while ( my $current = pop @check) {
$candidates{$current} = 1;
foreach my $rdep (@{$rhlinks{$current}}) {
# There should not be any cicles, but just in case
push @check, $rdep unless $candidates{$rdep};
}
# Remove links we are fixing
delete $rhlinks{$current};
}
# keys %candidates will be a complete list of hardlinks
# that points (in)directly to $file. Time to normalize
# the links.
#
# Sort in reverse order (allows pop instead of unshift)
@sorted = sort {$b cmp $a} keys %candidates;
# Our prefered target
$target = pop @sorted;
foreach my $link (@sorted) {
next unless exists $idxh{$target};
my $le = $idxh{$link};
# We may be "demoting" a "real file" to a "hardlink"
$le->{type} = 'h';
$le->{link} = $target;
}
if ($target ne $e->{name}) {
$idxh{$target}->{type} = '-';
# hardlinks does not have size, so copy that from the original
# entry.
$idxh{$target}->{size} = $e->{size};
delete $idxh{$target}->{link};
}
}
}
$self->{$field} = \%idxh;
close $idx;
close $num_idx if $num_idx;
return $self->{$field};
}
1;
=head1 NAME
Lintian::Collect::Package - Lintian base interface to binary and source package data collection
=head1 SYNOPSIS
my ($name, $type) = ('foobar', 'source');
my $collect = Lintian::Collect->new($name, $type);
my $file;
eval { $file = $collect->unpacked('/bin/ls'); };
if ( $file && -e $file ) {
# work with $file
;
} elsif ($file) {
print "/bin/ls is not available in the Package\n";
} else {
print "Package has not been unpacked\n";
}
=head1 DESCRIPTION
Lintian::Collect::Package provides part of an interface to package
data for source and binary packages. It implements data collection
methods specific to all packages that can be unpacked (or can contain
files)
This module is in its infancy. Most of Lintian still reads all data from
files in the laboratory whenever that data is needed and generates that
data via collect scripts. The goal is to eventually access all data about
source packages via this module so that the module can cache data where
appropriate and possibly retire collect scripts in favor of caching that
data in memory.
=head1 INSTANCE METHODS
=over 4
=item unpacked([$name])
Returns the path to the directory in which the package has been
unpacked. If C<$name> is given, it will return the path to that
specific file (or dir). The method will strip any leading "./" and
"/" from C<$name>, but it will not check if C<$name> actually exists
nor will it check for path traversals.
Caller is responsible for checking the sanity of the path passed to
unpacked and verifying that the returned path points to the expected
file.
The path returned is not guaranteed to be inside the Lintian Lab as
the package may have been unpacked outside the Lab (e.g. as
optimization).
The following code may be helpful in checking for path traversal:
use Cwd qw(realpath);
my $collect = ... ;
my $file = '../../../etc/passwd';
# Append slash to follow symlink if $collect->unpacked returns a symlink
my $uroot = realpath($collect->unpacked() . '/');
my $ufile = realpath($collect->unpacked($file));
if ($ufile =~ m,^$uroot,) {
# has not escaped $uroot
do_stuff($ufile);
} else {
# escaped $uroot
die "Possibly path traversal ($file)";
}
Alternatively one can use Util::resolve_pkg_path.
=item file_info
Returns a hashref mapping file names to the output of file for that file.
Note the file names do not have any leading "./" nor "/".
=item index
Returns a hashref to the index information (permissions, file type etc).
Note the file names do not have any leading "./" nor "/".
=item sorted_index
Returns a sorted list of all files listed in index (or file_info hashref).
It may contain an "empty" entry denoting the "root dir".
=back
=head1 AUTHOR
Originally written by Niels Thykier for Lintian.
=head1 SEE ALSO
lintian(1), Lintian::Collect(3), Lintian::Collect::Binary(3),
Lintian::Collect::Source(3)
=cut
# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et