Commit 2280c3be authored by Charles Plessy's avatar Charles Plessy

Imported Upstream version 1.02

parents
#!/usr/bin/perl
use strict;
use Module::Build;
my $HeaderFile = "bam.h";
my $LibFile = "libbam.a";
my $ReadLine;
my ($sam_include,$sam_lib) = find_sam(); # may exit with error here
my $build = Module::Build->new(
module_name => 'Bio::SamTools',
dist_version_from => 'lib/Bio/DB/Sam.pm',
dist_author => 'Lincoln Stein <lincoln.stein@gmail.com>',
dist_abstract => 'The GMOD Generic Genome Browser',
license => 'perl',
include_dirs => [$sam_include],
extra_linker_flags => ["-L$sam_lib",'-lbam','-lz'],
extra_compiler_flags=>[
# must match DFLAGS in Samtools Makefile
'-D_IOLIB=2','-D_FILE_OFFSET_BITS=64',
# turn off warnings originating in Perl's Newx* calls
'-Wformat=0'
],
build_requires => {
'ExtUtils::CBuilder' => 0,
},
requires => {
'perl' => '5.008',
},
# create_makefile_pl => 'passthrough',
);
$build->create_build_script;
exit 0;
sub find_sam {
my ($sam_include,$sam_lib);
if (my $samtools = $ENV{SAMTOOLS}) {
$sam_include = $samtools
if -e "$samtools/$HeaderFile";
$sam_lib = $samtools
if -e "$samtools/$LibFile";
}
my @search_path = qw(/ /usr /usr/share /usr/local);
unless ($sam_include) {
for my $p (@search_path) {
$sam_include ||= "$p/include" if
-e "$p/include/$HeaderFile";
}
}
unless ($sam_lib) {
for my $p (@search_path) {
$sam_lib ||= "$p/lib" if
-e "$p/lib/$LibFile";
}
}
unless ($sam_include && $sam_lib) {
print STDOUT "This module requires samtools 0.1.4 or higher (samtools.sourceforge.net).\n";
my $prompt = "Please enter the location of the bam.h and compiled libbam.a files: ";
my $found;
while (!$found) {
my $path = prompt($prompt);
print STDOUT "\n";
last unless $path;
$sam_include = $path
if -e "$path/$HeaderFile";
$sam_lib = $path
if -e "$path/$LibFile";
$found = $sam_include && $sam_lib;
unless ($found) {
print STDOUT "That didn't seem to be right.\n";
$prompt = "Try again, or hit <enter> to cancel: ";
}
}
}
unless ($sam_include && $sam_lib) {
die <<END;
Can\'t find $LibFile and/or $HeaderFile!
If you haven\'t done so already, please compile samtools 0.1.4 or
higher from http://samtools.sourceforge.net and set the SAMTOOLS
environment variable to point to a samtools distribution directory
containing the compiled $LibFile and $HeaderFile files.
END
}
print STDOUT "Found $sam_include/$HeaderFile and $sam_lib/$LibFile.\n";
return ($sam_include,$sam_lib);
}
sub prompt {
my $msg = shift;
unless (defined $ReadLine) {
eval "require Term::ReadLine";
$ReadLine = Term::ReadLine->can('new') || 0;
$ReadLine &&= Term::ReadLine->new(\*STDOUT);
eval {readline::rl_set('TcshCompleteMode','On')};
}
unless ($ReadLine) {
print STDOUT $msg;
chomp (my $in = <>);
return $in;
}
my $in = $ReadLine->readline($msg);
chomp $in;
$in=~ s/\s+//;
$ReadLine->addhistory($in) if $in =~ /\S/;
return $in;
}
Version 1.02
* Fixed bug in parsing of unsigned integer tags that caused some tags to
be treated as signed integer.
Version 1.01
* Fixed Build.PL to pick up correct location of bam header files.
* Eliminated several memory leaks that manifested when reading BAM files
with lots of targets.
* Made -fasta argument optional when using high-level interface.
Version 1.00
* initial release
Tue Jun 23 23:47:28 BST 2009
* Many documentation improvements
* Improved performance of high-level interface for fetch() and pileup() functions.
* Added information to README about how to compile with samtools 0.1.4
Thu May 7 09:47:02 EDT 2009
* See t/01sam.t for a demonstration of the API.
* Essentially all of the API is fleshed out, with the exception of the ability to generate
padded alignment itself.
* The $alignment->query() interface is the one to use for retrieving start, end and sequence
of the query sequence. The $alignment->target() interface flips the meaning of start and
end when the alignment is reversed, to accomodate old AceDB/GFF2 scripts.
The Bio::SamTools package and all associated files are Copyright (c)
2009 Ontario Institute for Cancer Research (OICR).
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. See the Artistic License file
in the main Perl distribution for specific terms and conditions of
use. In addition, the following disclaimers apply:
OICR makes no representations whatsoever as to the SOFTWARE contained
herein. It is experimental in nature and is provided WITHOUT WARRANTY
OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY OTHER
WARRANTY, EXPRESS OR IMPLIED. OICR MAKES NO REPRESENTATION OR
WARRANTY THAT THE USE OF THIS SOFTWARE WILL NOT INFRINGE ANY PATENT OR
OTHER PROPRIETARY RIGHT.
By downloading this SOFTWARE, your Institution hereby indemnifies OICR
against any loss, claim, damage or liability, of whatsoever kind or
nature, which may arise from your Institution's respective use,
handling or storage of the SOFTWARE.
If publications result from research using this SOFTWARE, we ask that
OICR be acknowledged and/or credit be given to OICR scientists, as
scientifically appropriate.
This diff is collapsed.
Build.PL
Changes
DISCLAIMER
lib/Bio/DB/Bam/Alignment.pm
lib/Bio/DB/Bam/AlignWrapper.pm
lib/Bio/DB/Bam/FetchIterator.pm
lib/Bio/DB/Bam/Pileup.pm
lib/Bio/DB/Bam/PileupWrapper.pm
lib/Bio/DB/Bam/Query.pm
lib/Bio/DB/Bam/ReadIterator.pm
lib/Bio/DB/Bam/Target.pm
lib/Bio/DB/Sam.pm
lib/Bio/DB/Sam.xs
lib/Bio/DB/Sam/Constants.pm
lib/Bio/DB/Sam/Segment.pm
LICENSE
MANIFEST This list of files
META.yml
README
t/01sam.t
t/data/00README.txt
t/data/ex1.bam
t/data/ex1.fa
t/data/ex1.sam.gz
typemap Don't delete this!
---
name: Bio-SamTools
version: 1.02
author:
- 'Lincoln Stein <lincoln.stein@gmail.com>'
abstract: The GMOD Generic Genome Browser
license: perl
resources:
license: http://dev.perl.org/licenses/
requires:
perl: 5.008
build_requires:
ExtUtils::CBuilder: 0
provides:
Bio::DB::Bam:
file: lib/Bio/DB/Sam.pm
Bio::DB::Bam::AlignWrapper:
file: lib/Bio/DB/Bam/AlignWrapper.pm
Bio::DB::Bam::Alignment:
file: lib/Bio/DB/Bam/Alignment.pm
Bio::DB::Bam::FetchIterator:
file: lib/Bio/DB/Bam/FetchIterator.pm
Bio::DB::Bam::Pileup:
file: lib/Bio/DB/Bam/Pileup.pm
Bio::DB::Bam::PileupWrapper:
file: lib/Bio/DB/Bam/PileupWrapper.pm
Bio::DB::Bam::Query:
file: lib/Bio/DB/Bam/Query.pm
Bio::DB::Bam::ReadIterator:
file: lib/Bio/DB/Bam/ReadIterator.pm
Bio::DB::Bam::SplitAlignmentPart:
file: lib/Bio/DB/Bam/AlignWrapper.pm
Bio::DB::Bam::Target:
file: lib/Bio/DB/Bam/Target.pm
Bio::DB::Sam:
file: lib/Bio/DB/Sam.pm
version: 1.02
Bio::DB::Sam::Constants:
file: lib/Bio/DB/Sam/Constants.pm
Bio::DB::Sam::Fai:
file: lib/Bio/DB/Sam.pm
Bio::DB::Sam::Segment:
file: lib/Bio/DB/Sam/Segment.pm
Bio::SeqFeature::Coverage:
file: lib/Bio/DB/Sam.pm
generated_by: Module::Build version 0.3
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.2.html
version: 1.2
This is a Perl interface to the SAMtools sequence alignment interface.
See http://samtools.sourceforge.net/.
To compile this module, you must first download, unpack and compile
SAMtools 0.1.4 or higher in some accessible directory. AS OF SAMTOOLS
VERSION 0.1.4, YOU MUST RUN "make -f makefile.generic" IN THE
DISTRIBUTION DIRECTORY IN ORDER TO CREATE THE REQUIRED libbam.a
LIBRARY FILE.
Then set the environment variable SAMTOOLS to point to this directory
and run:
perl Build.PL
./Build
./Build test
(sudo) ./Build install
If you encounter problems during compiling, you may need to edit
Build.PL so that extra_compiler_flags matches the CFLAGS and DFLAGS
settings in the Samtools Makefile. One issue that several people have
encountered is that 32 bit machines must not have -m64 set, and
vice-versa. Others have needed to add -fPIC to the Samtools CFLAGS
line in order for libbam.a to link properly with this module.
AUTHOR: Lincoln D. Stein <lincoln.stein@gmail.com>
Copyright (c) 2009 Ontario Institute for Cancer Research
This package and its accompanying libraries is free software; you can
redistribute it and/or modify it under the terms of the GPL (either
version 1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text.
package Bio::DB::Bam::AlignWrapper;
# $Id: AlignWrapper.pm,v 1.7 2009/07/09 18:52:45 lstein Exp $
=head1 NAME
Bio::DB::Bam::AlignWrapper -- Add high-level methods to Bio::DB::Bam::Alignment
=head1 SYNOPSIS
See L<Bio::DB::Bam::Alignment>.
=head1 DESCRIPTION
This is a wrapper around Bio::DB::Bam::Alignment that adds the
following high-level methods. These are described in detail in
L<Bio::DB::Bam::Alignment/High-level Bio::DB::Bam::Alignment methods>.
add_segment() add a new subfeature to split alignments
get_SeqFeatures() fetch subfeatures from split alignments
split_splices() process cigar strings to produce split alignments
expand_flags() return true if flags should be expanded into tags
seq_id() return human-readable reference sequence name
seq() return Bio::PrimarySeq object for reference sequence
subseq() return a subsequence across the indicated range
dna() return the DNA of the reference sequence
attributes() synonym for get_tag_values()
get_all_tags() return all tag names
get_tag_values() return the values of the given tag
has_tag() return true if the given tag is defined
=head1 SEE ALSO
L<Bio::Perl>, L<Bio::DB::Sam>, L<Bio::DB::Bam::Constants>
=head1 AUTHOR
Lincoln Stein E<lt>lincoln.stein@oicr.on.caE<gt>.
E<lt>lincoln.stein@bmail.comE<gt>
Copyright (c) 2009 Ontario Institute for Cancer Research.
This package and its accompanying libraries is free software; you can
redistribute it and/or modify it under the terms of the GPL (either
version 1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text. In addition,
please see DISCLAIMER.txt for disclaimers of warranty.
=cut
use strict;
use Bio::DB::Sam::Constants;
our $AUTOLOAD;
use Carp 'croak';
sub new {
my $package = shift;
my ($align,$sam) = @_;
my $self = bless {sam => $sam,
align => $align},ref $package || $package;
$self->add_segment($self->split_splices)
if $sam->split_splices && $align->cigar_str =~ /N/;
return $self;
}
sub AUTOLOAD {
my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
return if $func_name eq 'DESTROY';
no strict 'refs';
$_[0] or die "autoload called for non-object symbol $func_name";
croak qq(Can't locate object method "$func_name" via package "$pack")
unless $_[0]->{align}->can($func_name);
*{"${pack}::${func_name}"} = sub { shift->{align}->$func_name(@_) };
shift->$func_name(@_);
}
sub can {
my $self = shift;
return 1 if $self->SUPER::can(@_);
return $self->{align}->can(@_);
}
sub add_segment {
my $self = shift;
my @subfeat = @_;
$self->{segments} ||= [];
push @{$self->{segments}},@subfeat;
}
sub get_SeqFeatures {
my $self = shift;
return unless $self->{segments};
return @{$self->{segments}};
}
sub split_splices {
my $self = shift;
my $cigar = $self->cigar_array;
my @results;
my $start = 0;
my $end = 0;
my $skip = 0;
my $partial_cigar = '';
for my $op (@$cigar,['N',0]) {
my ($operation,$count) = @$op;
if ($operation eq 'N') {
my $s = $self->start + $start + $skip;
my $e = $self->start + $end - 1 + $skip;
my $f = Bio::DB::Bam::SplitAlignmentPart->new(-name => $self->display_name,
-start => $s,
-end => $e,
-seq_id => $self->seq_id,
-strand => +1,
-seq => substr($self->dna,
$start+$skip,
$end-$start),
-type => $self->type);
$f->hit(-name => $self->display_name,
-seq_id => $self->display_name,
-start => $start+1,
-end => $end,
-strand => $self->strand,
-seq => substr($self->qseq,$start,$end-$start),
);
$f->cigar_str($partial_cigar);
$partial_cigar = '';
push @results,$f;
$start += $end-$start;
} else {
$partial_cigar .= "$operation$count";
}
$end += $count if $operation =~ /^[MDSHP]/i;
$skip = $count if $operation eq 'N';
}
return @results;
}
sub expand_flags {
shift->{sam}->expand_flags(@_);
}
sub seq_id {
my $self = shift;
my $tid = $self->tid;
$self->{sam}->target_name($tid);
}
sub abs_ref { shift->seq_id }
sub abs_start { shift->start }
sub abs_end { shift->end }
sub low { shift->start }
sub high { shift->end }
sub type { shift->primary_tag }
sub method { shift->primary_tag }
sub source { return shift->source_tag; }
sub name { shift->qname }
sub class { shift->primary_tag }
sub seq {
my $self = shift;
return Bio::PrimarySeq->new(-seq => $self->dna,
-id => $self->seq_id);
}
sub subseq {
my $self = shift;
my ($start,$end) = @_;
$start = 1 if $start < 1;
$end = $self->high if $end > $self->high;
return Bio::PrimarySeq->new(-seq=>substr($self->dna,
$start-1,
$end-$start+1)
);
}
sub dna {
my $self = shift;
my $region = $self->seq_id.':'.$self->start.'-'.$self->end;
my $fai = $self->{sam}->fai;
return $fai ? $self->{sam}->fai->fetch($region) : 'N' x $self->length;
}
sub tseq {
shift->dna(@_);
}
sub attributes {
my $self = shift;
my $tag = shift;
if (defined $tag) {
return $self->get_tag_values($tag);
} else {
return map {$_=>$self->get_tag_values($_)} $self->get_all_tags;
}
}
sub get_all_tags {
my $self = shift;
return $self->{align}->get_all_tags(@_)
if $self->expand_flags;
return ($self->aux_keys,'FLAGS');
}
sub get_tag_values {
my $self = shift;
my $tag = shift;
defined $tag or return;
return $self->{align}->get_tag_values($tag)
if $self->expand_flags;
if ($tag eq 'FLAGS') {
$self->flag_str;
} else {
$self->aux_get($tag);
}
}
sub has_tag {
my $self = shift;
my $tag = shift;
defined $tag or return;
$self->{align}->get_tag_values($tag)
if $self->expand_flags;
if ($tag eq 'FLAGS') {
return 1;
} else {
my %keys = map {$_=>1} $self->aux_keys;
return exists $keys{uc $tag};
}
}
package Bio::DB::Bam::SplitAlignmentPart;
use base 'Bio::SeqFeature::Lite';
sub hit {
my $self = shift;
my $d = $self->{hit};
$self->{hit} = Bio::SeqFeature::Lite->new(@_) if @_;
return $d;
}
sub Bio::SeqFeature::Lite::subseq {
my $self = shift;
my ($start,$end) = @_;
$start = 1 if $start < 1;
$end = $self->high if $end > $self->high;
return Bio::PrimarySeq->new(-seq=>substr($self->dna,
$start-1,
$end-$start+1)
);
}
sub cigar_str {
my $self = shift;
my $d = $self->{cigar_str};
$self->{cigar_str} = shift if @_;
$d;
}
1;
This diff is collapsed.
package Bio::DB::Bam::FetchIterator;
use strict;
sub new {
my $self = shift;
my $list = shift;
my $total= shift;
$total ||= @$list;
return bless {list=>$list,
total=>$total},ref $self || $self;
}
sub next_seq {
my $self = shift;
return shift @{$self->{list}};
}
sub total {
shift->{total};
}
1;
package Bio::DB::Bam::Pileup;
# $Id: Pileup.pm,v 1.1 2009/06/25 16:15:36 lstein Exp $
# documentation only
1;
=head1 NAME
Bio::DB::Bam::Pileup -- Object passed to pileup() callback
=head1 SYNOPSIS
See L<Bio::DB::Sam/The generic fetch() and pileup() methods> for how
this object is passed to pileup callbacks.
=head1 DESCRIPTION
A Bio::DB::Bam::Pileup object (or a Bio::DB::Bam::PileupWrapper
object) is passed to the callback passed to the Bio::DB::Sam->pileup()
method for each column in a sequence alignment. The only difference
between the two is that the latter returns the more convenient
Bio::DB::Bam::AlignWrapper objects in response to the alignment()
method, at the cost of some performance loss.
=head2 Methods
=over 4
=item $alignment = $pileup->alignment
Return the Bio::DB::Bam::Alignment or Bio::DB::Bam::AlignWrapper
object representing the aligned read.
=item $qpos = $pileup->qpos
Return the position of this aligned column in read coordinates, using
zero-based coordinates.
=item $pos = $pileup->pos
Return the position of this aligned column in read coordinates, using
1-based coordinates.
=item $indel = $pileup->indel
If this column is an indel, return a positive integer for an insertion
relative to the reference, a negative integer for a deletion relative
to the reference, or 0 for no indel at this column.
=item $is_del = $pileup->is_del
True if the base on the padded read is a deletion.
=item $level = $pileup->level
If pileup() or fast_pileup() was invoked with the "keep_level" flag,
then this method will return a positive integer indicating the level
of the read in a printed multiple alignment.
=item $pileup->is_head
=item $pileup->is_tail
These fields are defined in bam.h but their interpretation is obscure.
=back
=head1 SEE ALSO
L<Bio::Perl>, L<Bio::DB::Sam>, L<Bio::DB::Bam::Alignment>, L<Bio::DB::Bam::Constants>
=head1 AUTHOR
Lincoln Stein E<lt>lincoln.stein@oicr.on.caE<gt>.
E<lt>lincoln.stein@bmail.comE<gt>
Copyright (c) 2009 Ontario Institute for Cancer Research.
This package and its accompanying libraries is free software; you can
redistribute it and/or modify it under the terms of the GPL (either
version 1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text. In addition,
please see DISCLAIMER.txt for disclaimers of warranty.
=cut
package Bio::DB::Bam::PileupWrapper;
#$Id: PileupWrapper.pm,v 1.2 2009/06/25 16:15:36 lstein Exp $
=head1 NAME
Bio::DB::Bam::PileupWrapper -- Add high-level methods to Bio::DB::Bam::Pileup
=head1 SYNOPSIS
See L<Bio::DB::Sam/The generic fetch() and pileup() methods> for usage of the pileup() method.
=head1 DESCRIPTION
See L<Bio::DB::Bam::Pileup> for documentation of this object's
methods. This class is used by the high-level API to return
Bio::DB::Bam::AlignWrapper objects from the call to alignment() rather
than Bio::DB::Bam::Alignment.
=head1 SEE ALSO
L<Bio::Perl>, L<Bio::DB::Sam>, L<Bio::DB::Bam::Constants>
=head1 AUTHOR
Lincoln Stein E<lt>lincoln.stein@oicr.on.caE<gt>.
E<lt>lincoln.stein@bmail.comE<gt>
Copyright (c) 2009 Ontario Institute for Cancer Research.
This package and its accompanying libraries is free software; you can
redistribute it and/or modify it under the terms of the GPL (either
version 1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text. In addition,
please see DISCLAIMER.txt for disclaimers of warranty.
=cut
use strict;
use Bio::DB::Bam::AlignWrapper;
our $AUTOLOAD;
use Carp 'croak';
sub new {
my $package = shift;
my ($align,$sam) = @_;
return bless {sam => $sam,
pileup => $align},ref $package || $package;
}
sub AUTOLOAD {
my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
return if $func_name eq 'DESTROY';
no strict 'refs';
$_[0] or die "autoload called for non-object symbol $func_name";
croak qq(Can't locate object method "$func_name" via package "$pack")
unless $_[0]->{pileup}->can($func_name);
*{"${pack}::${func_name}"} = sub { shift->{pileup}->$func_name(@_) };
shift->$func_name(@_);
}
sub can {
my $self = shift;
return 1 if $self->SUPER::can(@_);
return $self->{pileup}->can(@_);
}