Skip to content
Commits on Source (10)
......@@ -15,7 +15,6 @@ META.yml
MYMETA.yml
nytprof.out
pm_to_blib
doc/prokka-*
bug/
db/cm/*.i1*
db/kingdom/*/sprot.p*
......
language: perl
sudo: false
perl:
- "5.26"
install:
- "cpanm --quiet --notest Time::Piece XML::Simple Digest::MD5 Bio::Perl"
- "export PATH=$PWD/bin:$PATH"
script:
- "prokka --version"
- "prokka --help"
- "! prokka --doesnotexist"
- "prokka --depends"
- "prokka --setupdb"
- "prokka --listdb"
- "prokka --cpus 2 --outdir asm --prefix asm test/plasmid.fna"
- "grep '>' asm/asm.fna"
- "prokka --cleandb"
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env perl
# IN
#>ncbi~~~A7J11_00131~~~A7J11_00131 bifunctional aminoglycoside N-acetyltransferase AAC(3)-Ib/aminoglycoside N-acetyltransferase AAC(6')-Ib''
#ATGAGCATCATTGCAACCGTCAAGATCGGCCCTGACGAAATTTCAGCCATGAGGGCTGTG
#CTCGATCTCTTCGGCAAAGAGTTTGAGGACATTCCAACCTACTCTGATCGCCAGCCGACC
#AATGAGTATCTTGCCAATCTTCTGCACAGCGAGACGTTCATCGCGCTCGCTGCTTTTGAC
# OUT
#>Q92AT0 2.4.1.333~~~~~~1,2-beta-oligoglucan phosphorylase~~~COG3459
#MTMLKEIKKADLSAAFYPSGELAWLKLKDIMLNQVIQNPLENRLSQIYVRAHVGDKIEIYPLLSRDAEVGFNENGVEYRGVVGPFRYSVQMHFHTRGWFYDVTVDGD
use Bio::SeqIO;
use Data::Dumper;
@ARGV or die "USAGE: $0".
' $(dirname $(which abricate))/../db/ncbi/sequences'.
' > $(dirname $(which prokka))/../db/kingdom/Bacteria/AMR';
my $in = Bio::SeqIO->new(-fh=>\*ARGV, -format=>'fasta');
my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'fasta');
my %seen;
while (my $seq = $in->next_seq) {
my(undef,$gene,$locustag) = split m"~~~", $seq->id;
$gene = '' if $gene eq $locustag;
my $prot = $seq->translate;
die Dumper($prot) if $prot->seq =~ m/\*./; # check for stop codon in middle
die Dumper($prot) if $seen{$prot->seq}++; # check for dupes
$prot->id($locustag);
$prot->desc( join('~~~', '', $gene, $prot->desc, '') );
$out->write_seq($prot);
}
#!/usr/bin/perl
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::SeqIO;
......
#!/bin/bash
# this now redirects to EBI UK
URL="ftp://ftp.ebi.edu.au/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/"
# where to put formatted database: /path/to/prokka/db/
......@@ -30,6 +31,9 @@ for K in Viruses Archaea Bacteria ; do
"$ROOTDIR/prokka-uniprot_to_fasta_db" | \
makeblastdb -in - -dbtype prot -title "Prokka $K" -hash_index -out "$DIR/sprot"
# If you want a copy of the sprot file too (we do for git distribution)
blastdbcmd -db "$DIR/sprot" -entry all > "$DIR/sprot"
# If you want to use old BLAST:
# formatdb -i stdin -p T -V T -t "Prokka $K" -n "$DIR/sprot"
......
#!/usr/bin/perl -w
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Bio::SeqIO;
use Bio::AlignIO;
......@@ -63,8 +64,19 @@ for my $cid (sort keys %desc) {
my $faln = "$tempdir/$cid.FASTA";
# system("esl-reformat stockholm $tempdir/$cid.FASTA | grep -v lcl.consensus > $faln");
print STDERR "$cid | $faln >> $lib.aln\n";
if (not -r $faln) {
print STDERR "WARNING! skipping missing file: $faln\n";
next;
}
my $in = Bio::AlignIO->new(-file=>$faln, -format=>'fasta');
while (my $aln = $in->next_aln) {
# $aln->set_displayname_flat(); # remove bioperl suffix: "/begin-end"
# my %seen = ('lcl|consensus'=>1);
# for my $seq ($aln->each_seq) {
# if ($seen{ $seq->id }++) { # remove duplicates (why do they exist!?)
# $aln->remove_seq($seq);
# }
# }
my $desc = join '~~~', ('', $desc{$cid}{gene}, $desc{$cid}{prod});
print STDERR "$cid: $desc\n";
$aln->id($cid);
......@@ -73,6 +85,8 @@ for my $cid (sort keys %desc) {
# $aln->dblink("CDD:$cid");
$out->write_aln($aln);
}
# system("cat $faln $lib.aln");
# exit(-1);
}
system("hmmbuild $lib.hmm.ascii $lib.aln");
......
#!/usr/bin/perl -w
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use File::Temp qw(tempdir);
......
#!/usr/bin/perl -w
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Tools::CodonTable;
use Data::Dumper;
......
#!/usr/bin/perl -w
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::SeqIO;
my(@Options, $verbose, $format, $hypo, $idtag, $sep, $blank, $pseudo, $minlen);
......
#!/usr/bin/perl
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::SeqIO;
......@@ -21,7 +21,7 @@ while (<$ali_fh>) {
my @x = split m/\t/;
next if $x[5] =~ m/UPF\d+|homolog|[@<\[]/;
$x[5] =~ s/^probable\s+//gi;
$DESC{ $x[1] } = "$sep$x[4]$sep$x[5]";
$DESC{ $x[0] } = "$sep$x[4]$sep$x[5]";
}
printf STDERR "Accepted descriptions for %d families\n", scalar keys %DESC;
......@@ -43,7 +43,7 @@ while (<$fam_fh>) {
elsif (m/^AC\s+(\w+)/) {
$AC = $1;
}
elsif (m/EC=([\d.]+);/) {
elsif (m/EC=([\d.-]+);/) {
$EC = $1;
}
elsif (m/^ Bacteria/) {
......
#!/usr/bin/perl -w
#!/usr/bin/env perl
use strict;
use warnings;
use SWISS::Entry;
use SWISS::KW;
#use SWISS::OC;
......@@ -54,8 +55,19 @@ while (<ARGV>)
$gene = '' if $gene =~ m/\d{2}/ or $gene =~ m/\./;
my $ec = '';
# my $prod = 'hypothetical protein';
my $prod = '';
my $cog = '';
if (1) {
# [ 'eggNOG', 'COG4799', 'LUCA' ]
for my $dr ( @{ $entry->DRs->list } ) {
# print Dumper($dr);
if ($dr->[1] =~ m/^(COG\d+)$/) {
$cog = $1;
last;
}
}
}
if (1) {
for my $de ($entry->DEs->elements) {
......@@ -67,6 +79,7 @@ while (<ARGV>)
elsif ($de->type eq 'Full' and $de->category eq 'RecName') {
$prod = $de->text;
if ($prod =~ m/^UPF\d|^Uncharacterized protein|^ORF|^Protein /) {
next if ! $hypo;
$prod = $HYPO;
}
}
......@@ -83,8 +96,8 @@ while (<ARGV>)
$gene ||= $blank;
$prod ||= $blank;
print STDERR join("\t", $entry->AC, $ec, $gene, $prod), "\n" if $verbose;
print ">", $entry->AC, " $ec$sep$gene$sep$prod\n", $entry->SQs->seq, "\n";
print STDERR join("\t", $entry->AC, $ec, $gene, $prod, $cog), "\n" if $verbose;
print ">", $entry->AC, " $ec$sep$gene$sep$prod$sep$cog\n", $entry->SQs->seq, "\n";
$out++;
......@@ -106,7 +119,7 @@ sub setOptions {
{OPT=>"evidence=i", VAR=>\$evlev, DEFAULT=>2, DESC=>"1=prot 2=mrna 3=homol 4=pred 5=unsure"},
{OPT=>"fragments!", VAR=>\$frag, DEFAULT=>0, DESC=>"Include 'DE Flags: Fragment;' entries"},
{OPT=>"minlen=i", VAR=>\$minlen, DEFAULT=>20, DESC=>"Minimum peptide length"},
{OPT=>"maxlen=i", VAR=>\$maxlen, DEFAULT=>1E5, DESC=>"Minimum peptide length"},
{OPT=>"maxlen=i", VAR=>\$maxlen, DEFAULT=>1E5, DESC=>"Maximum peptide length"},
{OPT=>"term=s", VAR=>\$term, DEFAULT=>'', DESC=>"Lineage must contain this term eg. 'Bacteria'"},
{OPT=>"hypo!", VAR=>\$hypo, DEFAULT=>0, DESC=>"Don't filter out hypothetical proteins"},
);
......
No preview for this file type
This diff is collapsed.
No preview for this file type
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.