Skip to content
Commits on Source (4)
Summary of important user-visible changes for Bio-DB-NCBIHelper
---------------------------------------------------------------
1.7.6 2019-12-07 16:11:22-06:00 America/Chicago
* Requires Bio::DB::WebDBSeqI v1.7.7
1.7.5 2019-11-29 16:55:33-06:00 America/Chicago
* Explicitly add Bio::SeqIO::entrezgene and LWP::Protocol::https as dependencies
* Actually get '-email' to work which requires overriding get_seq_stream
* Fix remaining delay settings in tests which are causing issues during peak
NCBI times
1.7.4 2019-03-10 21:29:52-05:00 America/Chicago
* Switch away from Bio::Root::Test
* Add '-email' parameter, which allows lower delay interval at NCBI (currently
......
......@@ -30,18 +30,20 @@
},
"runtime" : {
"requires" : {
"Bio::DB::Query::WebQuery" : "0",
"Bio::DB::Query::WebQuery" : "v1.7.7",
"Bio::DB::Taxonomy" : "0",
"Bio::DB::WebDBSeqI" : "0",
"Bio::Root::IO" : "0",
"Bio::Root::Root" : "0",
"Bio::SeqIO" : "0",
"Bio::SeqIO::entrezgene" : "0",
"Bio::Taxon" : "0",
"Bio::WebAgent" : "0",
"CGI" : "0",
"Cache::FileCache" : "0",
"Getopt::Long" : "0",
"HTTP::Request::Common" : "0",
"LWP::Protocol::https" : "0",
"LWP::UserAgent" : "0",
"URI" : "0",
"URI::Escape" : "0",
......@@ -79,7 +81,7 @@
"web" : "https://github.com/bioperl/bio-db-ncbihelper"
}
},
"version" : "1.7.4",
"version" : "1.7.6",
"x_Dist_Zilla" : {
"perl" : {
"version" : "5.028001"
......@@ -294,7 +296,7 @@
"branch" : null,
"changelog" : "Changes",
"signed" : 0,
"tag" : "Bio-DB-NCBIHelper-v1.7.4",
"tag" : "Bio-DB-NCBIHelper-v1.7.6",
"tag_format" : "%N-v%v",
"tag_message" : "%N-v%v"
},
......@@ -309,6 +311,17 @@
"name" : "@BioPerl/Git::Tag",
"version" : "2.045"
},
{
"class" : "Dist::Zilla::Plugin::Prereqs",
"config" : {
"Dist::Zilla::Plugin::Prereqs" : {
"phase" : "runtime",
"type" : "requires"
}
},
"name" : "Prereqs",
"version" : "6.012"
},
{
"class" : "Dist::Zilla::Plugin::FinderCode",
"name" : ":InstallModules",
......
......@@ -24,18 +24,20 @@ meta-spec:
version: '1.4'
name: Bio-DB-NCBIHelper
requires:
Bio::DB::Query::WebQuery: '0'
Bio::DB::Query::WebQuery: v1.7.7
Bio::DB::Taxonomy: '0'
Bio::DB::WebDBSeqI: '0'
Bio::Root::IO: '0'
Bio::Root::Root: '0'
Bio::SeqIO: '0'
Bio::SeqIO::entrezgene: '0'
Bio::Taxon: '0'
Bio::WebAgent: '0'
CGI: '0'
Cache::FileCache: '0'
Getopt::Long: '0'
HTTP::Request::Common: '0'
LWP::Protocol::https: '0'
LWP::UserAgent: '0'
URI: '0'
URI::Escape: '0'
......@@ -49,7 +51,7 @@ resources:
bugtracker: https://github.com/bioperl/bio-db-ncbihelper/issues
homepage: https://metacpan.org/release/Bio-DB-NCBIHelper
repository: git://github.com/bioperl/bio-db-ncbihelper.git
version: 1.7.4
version: 1.7.6
x_Dist_Zilla:
perl:
version: '5.028001'
......@@ -218,7 +220,7 @@ x_Dist_Zilla:
branch: ~
changelog: Changes
signed: 0
tag: Bio-DB-NCBIHelper-v1.7.4
tag: Bio-DB-NCBIHelper-v1.7.6
tag_format: '%N-v%v'
tag_message: '%N-v%v'
Dist::Zilla::Role::Git::Repo:
......@@ -228,6 +230,14 @@ x_Dist_Zilla:
time_zone: local
name: '@BioPerl/Git::Tag'
version: '2.045'
-
class: Dist::Zilla::Plugin::Prereqs
config:
Dist::Zilla::Plugin::Prereqs:
phase: runtime
type: requires
name: Prereqs
version: '6.012'
-
class: Dist::Zilla::Plugin::FinderCode
name: ':InstallModules'
......
......@@ -22,18 +22,20 @@ my %WriteMakefileArgs = (
"MIN_PERL_VERSION" => "5.006",
"NAME" => "Bio::DB::NCBIHelper",
"PREREQ_PM" => {
"Bio::DB::Query::WebQuery" => 0,
"Bio::DB::Query::WebQuery" => "1.7.7",
"Bio::DB::Taxonomy" => 0,
"Bio::DB::WebDBSeqI" => 0,
"Bio::Root::IO" => 0,
"Bio::Root::Root" => 0,
"Bio::SeqIO" => 0,
"Bio::SeqIO::entrezgene" => 0,
"Bio::Taxon" => 0,
"Bio::WebAgent" => 0,
"CGI" => 0,
"Cache::FileCache" => 0,
"Getopt::Long" => 0,
"HTTP::Request::Common" => 0,
"LWP::Protocol::https" => 0,
"LWP::UserAgent" => 0,
"URI" => 0,
"URI::Escape" => 0,
......@@ -53,7 +55,7 @@ my %WriteMakefileArgs = (
"Test::Most" => 0,
"Test::RequiresInternet" => 0
},
"VERSION" => "1.7.4",
"VERSION" => "1.7.6",
"test" => {
"TESTS" => "t/*.t"
}
......@@ -61,12 +63,13 @@ my %WriteMakefileArgs = (
my %FallbackPrereqs = (
"Bio::DB::Query::WebQuery" => 0,
"Bio::DB::Query::WebQuery" => "1.7.7",
"Bio::DB::Taxonomy" => 0,
"Bio::DB::WebDBSeqI" => 0,
"Bio::Root::IO" => 0,
"Bio::Root::Root" => 0,
"Bio::SeqIO" => 0,
"Bio::SeqIO::entrezgene" => 0,
"Bio::Taxon" => 0,
"Bio::WebAgent" => 0,
"CGI" => 0,
......@@ -76,6 +79,7 @@ my %FallbackPrereqs = (
"HTTP::Request::Common" => 0,
"IO::Handle" => 0,
"IPC::Open3" => 0,
"LWP::Protocol::https" => 0,
"LWP::UserAgent" => 0,
"Test::Exception" => 0,
"Test::More" => 0,
......
libbio-db-ncbihelper-perl (1.7.6-1) unstable; urgency=medium
* New upstream version
-- Michael R. Crusoe <michael.crusoe@gmail.com> Sat, 04 Jan 2020 12:44:54 +0100
libbio-db-ncbihelper-perl (1.7.4-1) unstable; urgency=low
* Initial release. (Closes: #940258)
......
name = Bio-DB-NCBIHelper
version = 1.7.4
version = 1.7.6
author = Aaron Mackey <amackey@virginia.edu>
author = Brian Osborne <bosborne@alum.mit.edu>
author = Jason Stajich <jason@bioperl.org>
......@@ -13,3 +13,8 @@ license = Perl_5
-remove = PodWeaver
-remove = Test::EOL
-remove = Test::NoTabs
[Prereqs]
Bio::DB::Query::WebQuery = 1.7.7
Bio::SeqIO::entrezgene = 0
LWP::Protocol::https = 0
......@@ -93,7 +93,7 @@ methods. Internal methods are usually preceded with a _
# Let the code begin...
package Bio::DB::EntrezGene;
$Bio::DB::EntrezGene::VERSION = '1.7.4';
$Bio::DB::EntrezGene::VERSION = '1.7.6';
use strict;
use vars qw($DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING);
......
......@@ -165,7 +165,7 @@ preceded with a _
# Let the code begin...
package Bio::DB::GenBank;
$Bio::DB::GenBank::VERSION = '1.7.4';
$Bio::DB::GenBank::VERSION = '1.7.6';
use strict;
use vars qw(%PARAMSTRING $DEFAULTFORMAT $DEFAULTMODE);
......
......@@ -93,7 +93,7 @@ methods. Internal methods are usually preceded with a _
# Let the code begin...
package Bio::DB::GenPept;
$Bio::DB::GenPept::VERSION = '1.7.4';
$Bio::DB::GenPept::VERSION = '1.7.6';
use strict;
use vars qw($DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING);
......
......@@ -85,7 +85,7 @@ preceded with a _
# Let the code begin...
package Bio::DB::NCBIHelper;
$Bio::DB::NCBIHelper::VERSION = '1.7.4';
$Bio::DB::NCBIHelper::VERSION = '1.7.6';
use strict;
use Bio::DB::Query::GenBank;
......@@ -261,6 +261,7 @@ sub get_request {
'-format' => $format,
'-email' => $email
);
$self->_sleep();
return $self->get_request(%qualifiers);
}
else {
......@@ -269,6 +270,116 @@ sub get_request {
}
}
=head2 get_seq_stream
Title : get_seq_stream
Usage : my $seqio = $self->get_seq_stream(%qualifiers)
Function: builds a url and queries a web db
Returns : a Bio::SeqIO stream capable of producing sequence
Args : %qualifiers = a hash qualifiers that the implementing class
will process to make a url suitable for web querying
=cut
sub get_seq_stream {
my ($self, %qualifiers) = @_;
my ($rformat, $ioformat) = $self->request_format();
my $seen = 0;
foreach my $key ( keys %qualifiers ) {
if( $key =~ /format/i ) {
$rformat = $qualifiers{$key};
$seen = 1;
}
}
$qualifiers{'-format'} = $rformat if( !$seen);
($rformat, $ioformat) = $self->request_format($rformat);
# These parameters are implemented for Bio::DB::GenBank objects only
if($self->isa('Bio::DB::GenBank')) {
$self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start());
$self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop());
$self->strand() && ($qualifiers{'-strand'} = $self->strand());
$self->email() && ($qualifiers{'-email'} = $self->email());
defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity());
}
my $request = $self->get_request(%qualifiers);
$request->proxy_authorization_basic($self->authentication)
if ( $self->authentication);
$self->debug("request is ". $request->as_string(). "\n");
# workaround for MSWin systems
$self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/;
if ($self->retrieval_type =~ /pipeline/) {
# Try to create a stream using POSIX fork-and-pipe facility.
# this is a *big* win when fetching thousands of sequences from
# a web database because we can return the first entry while
# transmission is still in progress.
# Also, no need to keep sequence in memory or in a temporary file.
# If this fails (Windows, MacOS 9), we fall back to non-pipelined access.
# fork and pipe: _stream_request()=><STREAM>
my ($result,$stream) = $self->_open_pipe();
if (defined $result) {
$DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugger
if (!$result) { # in child process
$self->_stream_request($request,$stream);
POSIX::_exit(0); #prevent END blocks from executing in this forked child
}
else {
return Bio::SeqIO->new('-verbose' => $self->verbose,
'-format' => $ioformat,
'-fh' => $stream);
}
}
else {
$self->retrieval_type('io_string');
}
}
if ($self->retrieval_type =~ /temp/i) {
my $dir = $self->io->tempdir( CLEANUP => 1);
my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
close $fh;
my $resp = $self->_request($request, $tmpfile);
if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
$self->throw("WebDBSeqI Error - check query sequences!\n");
}
$self->postprocess_data('type' => 'file',
'location' => $tmpfile);
# this may get reset when requesting batch mode
($rformat,$ioformat) = $self->request_format();
if( $self->verbose > 0 ) {
open my $ERR, '<', $tmpfile or $self->throw("Could not read file '$tmpfile': $!");
while(<$ERR>) { $self->debug($_);}
close $ERR;
}
return Bio::SeqIO->new('-verbose' => $self->verbose,
'-format' => $ioformat,
'-file' => $tmpfile);
}
if ($self->retrieval_type =~ /io_string/i ) {
my $resp = $self->_request($request);
my $content = $resp->content_ref;
$self->debug( "content is $$content\n");
if (!$resp->is_success() || length($$content) == 0) {
$self->throw("WebDBSeqI Error - check query sequences!\n");
}
($rformat,$ioformat) = $self->request_format();
$self->postprocess_data('type'=> 'string',
'location' => $content);
$self->debug( "str is $$content\n");
return Bio::SeqIO->new('-verbose' => $self->verbose,
'-format' => $ioformat,
'-fh' => new IO::String($$content));
}
# if we got here, we don't know how to handle the retrieval type
$self->throw("retrieval type " . $self->retrieval_type .
" unsupported\n");
}
=head2 get_Stream_by_batch
......
......@@ -96,7 +96,7 @@ preceded with a _
# Let the code begin...
package Bio::DB::Query::GenBank;
$Bio::DB::Query::GenBank::VERSION = '1.7.4';
$Bio::DB::Query::GenBank::VERSION = '1.7.6';
use strict;
use URI::Escape 'uri_unescape';
use Bio::DB::NCBIHelper;
......@@ -112,11 +112,12 @@ use constant DEFAULT_DB => 'protein';
use constant MAXENTRY => 100;
use vars qw(@ATTRIBUTES);
our $REQUEST_DELAY = 4;
use base qw(Bio::DB::Query::WebQuery);
BEGIN {
@ATTRIBUTES = qw(db reldate mindate maxdate datetype maxids);
@ATTRIBUTES = qw(db reldate mindate maxdate datetype maxids email);
for my $method (@ATTRIBUTES) {
eval <<END;
sub $method {
......@@ -144,6 +145,10 @@ END
-ids array ref of gids (overrides query)
-maxids the maximum number of IDs you wish to collect
(defaults to 100)
-email Email address; required if you want to decrease
delay time between queries
-delay Delay time (in seconds). Note NCBI policy requires 4
seconds between requests unless an email is provided
This method creates a new query object. Typically you will specify a
-db and a -query argument, possibly modified by -mindate, -maxdate, or
......@@ -189,13 +194,15 @@ receive when you generate a SeqIO stream from the query.
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
my ($query,$db,$reldate,$mindate,$maxdate,$datetype,$ids,$maxids)
= $self->_rearrange([qw(QUERY DB RELDATE MINDATE MAXDATE DATETYPE IDS MAXIDS)],@_);
my ($query,$db,$reldate,$mindate,$maxdate,$datetype,$ids,$maxids,$email,$delay)
= $self->_rearrange([qw(QUERY DB RELDATE MINDATE MAXDATE DATETYPE IDS
MAXIDS EMAIL DELAY)],@_);
$self->db($db || DEFAULT_DB);
$reldate && $self->reldate($reldate);
$mindate && $self->mindate($mindate);
$maxdate && $self->maxdate($maxdate);
$maxids && $self->maxids($maxids);
$email && $self->email($email);
$datetype ||= 'mdat';
$datetype && $self->datetype($datetype);
$self;
......@@ -222,6 +229,7 @@ sub cookie {
}
else {
$self->_sleep();
$self->_run_query;
@{$self}{qw(_cookie _querynum)};
}
......@@ -248,6 +256,9 @@ sub _request_parameters {
push @params,('term' => $self->query);
# Providing 'retmax' limits queries to 500 sequences ?? I don't think so LS
push @params,('retmax' => $self->maxids || MAXENTRY);
if ($self->email) {
push @params,('email' => $self->email);
}
# And actually, it seems that we need 'retstart' equal to 0 ?? I don't think so LS
# push @params, ('retstart' => 0);
......@@ -276,6 +287,7 @@ sub count {
return $d;
}
else {
$self->_sleep();
$self->_run_query;
return $self->{'_count'};
}
......@@ -357,4 +369,22 @@ sub _generate_id_string {
} @$ids));
}
=head2 delay_policy
Title : delay_policy
Usage : $secs = $self->delay_policy
Function: NCBI requests a delay of 4 seconds between requests unless email is
provided. This method implements a 4 second delay; use 'delay()' to
override, though understand if no email is provided we are not
responsible for users being IP-blocked by NCBI
Returns : number of seconds to delay
Args : none
=cut
sub delay_policy {
my $self = shift;
return $REQUEST_DELAY;
}
1;
......@@ -101,7 +101,7 @@ Internal methods are usually preceded with a _
# Let the code begin...
package Bio::DB::Taxonomy::entrez;
$Bio::DB::Taxonomy::entrez::VERSION = '1.7.4';
$Bio::DB::Taxonomy::entrez::VERSION = '1.7.6';
use vars qw($EntrezLocation $UrlParamSeparatorValue %EntrezParams
$EntrezGet $EntrezSummary $EntrezFetch %SequenceParams
$XMLTWIG $DATA_CACHE $RELATIONS);
......@@ -168,7 +168,7 @@ sub _initialize {
$self->SUPER::_initialize(@_);
my ($location,$params) = $self->_rearrange([qw(LOCATION PARAMS)],@_);
my ($location,$params,$email) = $self->_rearrange([qw(LOCATION PARAMS EMAIL)],@_);
if( $params ) {
if( ref($params) !~ /HASH/i ) {
......@@ -178,6 +178,9 @@ sub _initialize {
} else {
$params = \%EntrezParams;
}
if ($email) {
$params->{email} = $email;
}
$self->entrez_params($params);
$self->entrez_url($location || $EntrezLocation );
}
......@@ -593,7 +596,6 @@ sub entrez_params{
return %$f;
}
=head2 Bio::DB::WebBase methods
=head2 proxy_string
......@@ -667,6 +669,7 @@ sub _run_query {
# Given an eutil url, run the eutil query and parse the response into an
# XML Twig object
my ($self, $url) = @_;
$self->sleep();
my $response = $self->get($url);
if ($response->is_success) {
$response = $response->content;
......
......@@ -10,8 +10,18 @@ BEGIN {
use_ok('Bio::DB::EntrezGene');
}
my %params;
if (defined $ENV{BIOPERLEMAIL}) {
$params{'-email'} = $ENV{BIOPERLEMAIL};
$params{'-delay'} = 2;
}
$params{'-verbose'} = $ENV{BIOPERLDEBUG};
my ($gb, $seq, $seqio);
ok $gb = Bio::DB::EntrezGene->new(-retrievaltype => 'tempfile', -delay => 0);
ok $gb = Bio::DB::EntrezGene->new(-retrievaltype => 'tempfile', %params);
#
# Bio::DB::EntrezGene
......
......@@ -21,11 +21,20 @@ my %expected_lengths = (
my ($gb, $seq, $seqio, $seqin);
my %params;
if (defined $ENV{BIOPERLEMAIL}) {
$params{'-email'} = $ENV{BIOPERLEMAIL};
$params{'-delay'} = 2;
}
$params{'-verbose'} = $ENV{BIOPERLDEBUG};
#
# Bio::DB::GenBank
#
ok $gb = Bio::DB::GenBank->new(), 'Bio::DB::GenBank';
ok $gb = Bio::DB::GenBank->new(%params), 'Bio::DB::GenBank';
# get a single seq
SKIP: {
......@@ -48,7 +57,7 @@ $seq = $seqio = undef;
# batch mode
SKIP: {
eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@;
skip "Batch access test failed for Genbank. Skipping those tests: $@", 4 if $@;
my $done = 0;
while (my $s = $seqio->next_seq) {
is $s->length, $expected_lengths{$s->display_id}, $s->display_id;
......@@ -61,23 +70,24 @@ SKIP: {
$seq = $seqio = undef;
# test the temporary file creation and fasta
ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0);
ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', %params),
"Tempfile tests";
SKIP: {
eval {$seq = $gb->get_Seq_by_id('J00522');};
skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests: $@", 6 if $@;
# last part of id holds the key
is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id;
is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, "Check tmpfile: get_Seq_by_id:".$seq->display_id;
eval {$seq = $gb->get_Seq_by_acc('AF303112');};
skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests: $@", 5 if $@;
# last part of id holds the key
is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id;
is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, "Check tmpfile: get_Seq_by_acc:".$seq->display_id;
# batch mode requires genbank format
$gb->request_format("gb");
eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests: $@", 4 if $@;
my $done = 0;
while (my $s = $seqio->next_seq) {
is $s->length, $expected_lengths{$s->display_id};
is $s->length, $expected_lengths{$s->display_id}, "Check tmpfile: get_Stream_by_id:".$s->display_id;
undef $gb; # test the case where the db is gone,
# but a temp file should remain until seqio goes away.
$done++;
......@@ -89,19 +99,19 @@ SKIP: {
$seq = $seqio = undef;
# test pipeline creation
ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', '-delay' => 0);
ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', %params), "Pipeline tests";
SKIP: {
eval {$seq = $gb->get_Seq_by_id('J00522');};
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests: $@", 6 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, "Check pipeline: get_Seq_by_id:".$seq->display_id;
eval {$seq = $gb->get_Seq_by_acc('AF303112');};
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests: $@", 5 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, "Check pipeline: get_Seq_by_acc:".$seq->display_id;
eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests: $@", 4 if $@;
my $done = 0;
while (my $s = $seqio->next_seq) {
is $s->length, $expected_lengths{$s->display_id}, $s->display_id;
is $s->length, $expected_lengths{$s->display_id}, "Check pipeline: get_Stream_by_id:".$s->display_id;
undef $gb; # test the case where the db is gone,
# but the pipeline should remain until seqio goes away
$done++;
......@@ -113,32 +123,32 @@ SKIP: {
$seq = $seqio = undef;
# test contig retrieval
ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gbwithparts');
ok $gb = Bio::DB::GenBank->new('-format' => 'gbwithparts', %params);
SKIP: {
eval {$seq = $gb->get_Seq_by_id('JH374761');};
skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests: $@", 3 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, "Check contig: get_Seq_by_id:".$seq->display_id;
# now to check that postprocess_data in NCBIHelper catches CONTIG...
ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gb');
ok $gb = Bio::DB::GenBank->new('-format' => 'gb',%params);
eval {$seq = $gb->get_Seq_by_id('JH374761');};
skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests: $@", 1 if $@;
is $seq->length, $expected_lengths{$seq->display_id}, "Check contig: get_Seq_by_acc".$seq->display_id;
}
$seq = $seqio = undef;
# bug 1405
my @result;
ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start => 2, -seq_stop => 7);
ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start => 2, -seq_stop => 7, %params);
SKIP: {
eval {$seq = $gb->get_Seq_by_acc("A11111");};
skip "Couldn't connect to complete GenBank tests. Skipping those tests", 15 if $@;
skip "Couldn't connect to complete GenBank tests. Skipping those tests: $@", 15 if $@;
is $seq->length, 6;
# complexity tests
ok $gb = Bio::DB::GenBank->new(-format => 'fasta', -complexity => 0);
ok $gb = Bio::DB::GenBank->new(-format => 'fasta', -complexity => 0, %params);
eval {$seqin = $gb->get_Stream_by_acc("21614549");};
skip "Couldn't connect to complete GenBank tests. Skipping those tests", 13 if $@;
skip "Couldn't connect to complete GenBank tests. Skipping those tests: $@", 13 if $@;
my @result = (4366, 'dna', 620, 'protein');
# Test number is labile (dependent on remote results)
......@@ -152,9 +162,9 @@ SKIP: {
# Real batch retrieval using epost/efetch
# these tests may change if integrated further into Bio::DB::Gen*
# Currently only useful for retrieving GI's via get_seq_stream
$gb = Bio::DB::GenBank->new();
$gb = Bio::DB::GenBank->new(%params);
eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');};
skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 8 if $@;
skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests: $@", 8 if $@;
my %result = ('M59757' => 12611 ,'X76083'=> 3140, 'J01670'=> 1593);
my $ct = 0;
......
......@@ -16,12 +16,21 @@ my %expected_lengths = (
'2AAA_YEAST' => 635
);
my %params;
if (defined $ENV{BIOPERLEMAIL}) {
$params{'-email'} = $ENV{BIOPERLEMAIL};
$params{'-delay'} = 2;
}
$params{'-verbose'} = $ENV{BIOPERLDEBUG};
my ($gb, $seq, $seqio);
#
# Bio::DB::GenPept
#
ok $gb = Bio::DB::GenPept->new();
ok $gb = Bio::DB::GenPept->new(%params);
SKIP: {
eval {$seqio = $gb->get_seq_stream(-uids => [2981015, 1621261, 195055], -mode => 'batch');};
skip "Couldn't connect to complete GenPept tests. Skipping those tests", 8 if $@;
......@@ -41,7 +50,7 @@ SKIP: {
$seq = $seqio = undef;
ok $gb = Bio::DB::GenPept->new('-delay' => 0);
ok $gb = Bio::DB::GenPept->new(%params);
SKIP: {
eval {$seq = $gb->get_Seq_by_id('195055');};
skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests: $@", 10 if $@;
......
......@@ -23,17 +23,27 @@ my %expected_lengths = (
my ($gb, $seq, $seqio, $seqin, $query);
my %params;
if (defined $ENV{BIOPERLEMAIL}) {
$params{'-email'} = $ENV{BIOPERLEMAIL};
$params{'-delay'} = 2;
}
$params{'-verbose'} = $ENV{BIOPERLDEBUG};
# test query facility
ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
'-query' => 'Onchocerca volvulus[Organism]',
'-mindate' => '2002/1/1',
'-maxdate' => '2002/12/31'), 'Bio::DB::Query::GenBank';
'-maxdate' => '2002/12/31', %params), 'Bio::DB::Query::GenBank';
SKIP: {
cmp_ok $query->count, '>', 0;
my @ids = $query->ids;
cmp_ok @ids, '>', 0;
is @ids, $query->count;
ok $gb = Bio::DB::GenBank->new('-delay' => 0);
ok $gb = Bio::DB::GenBank->new(%params);
eval {$seqio = $gb->get_Stream_by_query($query);};
skip "Couldn't connect to complete GenBank query tests. Skipping those tests", 5 if $@;
my $done = 0;
......@@ -51,13 +61,14 @@ $seq = $seqio = undef;
# test query facility (again)
ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
'-ids' => [qw(J00522 AF303112 2981014)]);
'-ids' => [qw(J00522 AF303112 2981014)],
%params);
SKIP: {
cmp_ok $query->count, '>', 0;
my @ids = $query->ids;
cmp_ok @ids, '>', 0;
is @ids, $query->count;
$gb = Bio::DB::GenBank->new('-delay' => 0);
$gb = Bio::DB::GenBank->new(%params);
eval {$seqio = $gb->get_Stream_by_query($query);};
skip "Couldn't connect to complete GenBank query tests. Skipping those tests: $@", 4 if $@;
my $done = 0;
......@@ -74,5 +85,6 @@ $seq = $seqio = undef;
# and yet again, for bug 2133
$query = Bio::DB::Query::GenBank->new('-query' => 'AF303112',
'-ids' => [qw(J00522 AF303112 2981014)]);
'-ids' => [qw(J00522 AF303112 2981014)],
%params);
is $query->query, 'J00522[PACC]|AF303112[PACC]|2981014[UID]';
......@@ -7,14 +7,21 @@ use Test::RequiresInternet;
use_ok('Bio::DB::Taxonomy');
my %params;
if (defined $ENV{BIOPERLEMAIL}) {
$params{'-email'} = $ENV{BIOPERLEMAIL};
$params{'-delay'} = 1;
}
{
ok my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
ok my $db = Bio::DB::Taxonomy->new(-source => 'entrez', %params);
isa_ok $db, 'Bio::DB::Taxonomy::entrez';
isa_ok $db, 'Bio::DB::Taxonomy';
}
{
my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
my $db = Bio::DB::Taxonomy->new(-source => 'entrez', %params);
my $id;
my $n;
......@@ -106,7 +113,6 @@ use_ok('Bio::DB::Taxonomy');
@ids = $db->get_taxonids('Rhodotorula');
cmp_ok @ids, '>=' , 1;
diag(join(",", @ids));
# From NCBI: Taxid 592558 was merged into taxid 5533 on June 16, 2017
is( (grep { $_ == 592558 } @ids), 0, 'Value no longer found');
ok grep { $_ == 5533 } @ids;
......@@ -115,7 +121,7 @@ use_ok('Bio::DB::Taxonomy');
# we can recursively fetch all descendents of a taxon
{
my $db = Bio::DB::Taxonomy->new(-source=>"entrez");
my $db = Bio::DB::Taxonomy->new(-source=>"entrez", %params);
$db->get_taxon(10090);
my $lca = $db->get_taxon(314146);
......@@ -126,7 +132,7 @@ use_ok('Bio::DB::Taxonomy');
# tests for #182
{
my $db = Bio::DB::Taxonomy->new(-source=>"entrez");
my $db = Bio::DB::Taxonomy->new(-source=>"entrez", %params);
my @taxa = qw(viruses Deltavirus unclassified plasmid);
......@@ -151,7 +157,7 @@ use_ok('Bio::DB::Taxonomy');
# tests for #212
{
my $db = Bio::DB::Taxonomy->new( -source => "entrez" );
my $db = Bio::DB::Taxonomy->new( -source => "entrez", %params);
# String | What I expect | What I get
# ---------------------- | ------------- | ----------
......