Skip to content
Commits on Source (7)
This pull request addresses issue #...
I hereby agree to dual licence this and any previous contributions under both
<!--- Please read each of the following items and confirm by replacing
!--the [ ] with a [X] --->
- [ ] I hereby agree to dual licence this and any previous contributions under both
the _Biopython License Agreement_ **AND** the _BSD 3-Clause License_.
I have read the ``CONTRIBUTING.rst`` file and understand that AppVeyor and
- [ ] I have read the ``CONTRIBUTING.rst`` file and understand that AppVeyor and
TravisCI will be used to confirm the Biopython unit tests and ``flake8`` style
checks pass with these changes.
I have added my name to the alphabetical contributors listings in the files
- [ ] I have added my name to the alphabetical contributors listings in the files
``NEWS.rst`` and ``CONTRIB.rst`` as part of this pull request, am listed
already, or do not wish to be listed. (*This acknowledgement is optional.*)
......@@ -53,10 +53,12 @@ Tests/biosql.ini
#TODO - The unit tests shouldn't leave temp files after running:
Tests/BioSQL/temp_sqlite.db
Tests/BioSQL/temp_sqlite.db-journal
Tests/Cluster/cyano_result*
#TODO - The Tutorial doctests should leave example files after
#running Tests/test_Tutorial.py
Doc/examples/other_trees.nwk
Doc/examples/tree1.nwk
#Ignore LaTeX temp files, and compiled output
Doc/*.aux
......
......@@ -33,6 +33,7 @@ envlist =
style
sdist
bdist_wheel
api
{py27,py34,py35,py36,pypy,pypy3}-cover
{py27,py34,py35,py36,pypy,pypy3}-nocov
......@@ -46,6 +47,7 @@ passenv =
TRAVIS_*
TOXENV
CODECOV_*
HOME_4_TCOFFEE
whitelist_externals =
bash
echo
......@@ -57,18 +59,18 @@ deps =
#Leaving py34 without any soft dependencies (just numpy)
cover: coverage
cover: codecov
{py27}: unittest2
{py27}: mysql-python
{py27,py36}: mmtf-python
{py27,py35}: reportlab
{py27,py34,py35,py36}: psycopg2-binary
{py27,py34,py35,py35}: mysql-connector-python-rf
{py27,py35,pypy}: rdflib
{pypy,pypy3}: numpy==1.12.1
{py27,py34,py36}: numpy
{py36}: scipy
{py27}: networkx
{py36}: matplotlib
py27: unittest2
py27: mysql-python
py27,py36: mmtf-python
py27,py35: reportlab
py27,py34,py35,py36: psycopg2-binary
py27,py34,py35,py35: mysql-connector-python-rf
py27,py35,pypy: rdflib
pypy,pypy3: numpy==1.12.1
py27,py34,py36: numpy
py36: scipy
py27: networkx
py36: matplotlib
commands =
#The bash call is a work around for special characters
#The /dev/null is to hide the verbose output but leave warnings
......@@ -85,6 +87,7 @@ commands =
skip_install = True
whitelist_externals =
flake8
doc8
rst-lint
bash
deps =
......@@ -92,7 +95,9 @@ deps =
flake8-docstrings
flake8-blind-except
flake8-rst-docstrings
py34,py35,py36: flake8-bugbear
restructuredtext_lint
doc8
commands =
flake8 --max-line-length 82 setup.py
# These folders each have their own .flake8 file:
......@@ -108,6 +113,8 @@ commands =
bash -c \'grep "^- " CONTRIB.rst | LC_ALL=C sort -u -c -f\'
# Check copyright date
bash -c \'grep "1999-`date +'%Y'`" LICENSE.rst\'
# Would like to tell doc8 to just check *.rst but does *.txt too:
bash -c "doc8 --ignore-path 'Doc/examples/ec_*.txt' *.rst Doc/"
# Check no __docformat__ lines
bash -c "if grep --include '*.py' -rn '^__docformat__ ' Bio BioSQL Tests Scripts Doc ; then echo 'Remove __docformat__ line(s), we assume restructuredtext.'; false; fi"
# Check DOI link style, see https://www.crossref.org/display-guidelines/
......@@ -129,3 +136,24 @@ deps =
numpy
commands =
python setup.py bdist_wheel
[testenv:api]
# Note Sphinx likes to have the code installed so can import it
skip_install = False
whitelist_externals =
bash
sphinx-apidoc
make
deps =
mmtf-python
mysql-connector-python-rf
numpy
rdflib
reportlab
scipy
sphinx>=1.8.0
numpydoc
commands =
bash -c \'python setup.py install > /dev/null\'
bash -c \'mkdir -p Doc/api/_templates Doc/api/_static Doc/api/_build\'
make -C Doc/api/ html
......@@ -38,6 +38,13 @@ matrix:
apt:
packages:
before_install: echo "Going to run basic checks"
- stage: test
python: 2.7
env: TOXENV=api
addons:
apt:
packages:
before_install: echo "Going to build API docs"
- stage: test
python: 2.7
env: TOXENV=py27-cover
......@@ -72,6 +79,8 @@ addons:
- probcons
- samtools
- wise
- t-coffee
- ncbi-blast+
# We setup $HOME/bin and add it to the $PATH for extra binaries we're using.
#
......@@ -99,6 +108,9 @@ before_install:
- curl -L -O https://anaconda.org/bioconda/genepop/4.5.1/download/linux-64/genepop-4.5.1-0.tar.bz2
# This will create ./bin/Genepop and a harmless ./info/ folder.
- tar -jxvf genepop-4.5.1-0.tar.bz2
# Setup environment for t-coffee
- mkdir -p $HOME/tcoffee_temp
- export HOME_4_TCOFFEE=$HOME/tcoffee_temp
# There are TravisCI provided versions of PyPy and PyPy3, but currently too old.
# We therefore deactivate that, and download and unzip portable PyPy binaries.
- |
......@@ -140,7 +152,7 @@ install:
- tox -c .travis-tox.ini -e $TOXENV --notest
script:
- travis_wait tox -c .travis-tox.ini -e $TOXENV
- travis_wait 30 tox -c .travis-tox.ini -e $TOXENV
notifications:
email: false
......@@ -5,11 +5,10 @@ ignore =
# =======================
# flake: E###, F###, W###
# =======================
# pycodestyle v2.3.1 default ignore is E121,E123,E126,E226,E24,E704,W503
# flake8 v3.3.0 default ignore is E121,E123,E126,E226,E24,E704,W503,W504
#
# pycodestyle v2.4.0 default ignore is E121,E123,E126,E226,E24,E704,W503,W504
# flake8 v3.6.0 default ignore is E121,E123,E126,E226,E24,E704,W503,W504
# These are ignored by default:
E122,E123,E126,W503,
E122,E123,E126,W503,W504,
# These are not ignored by default:
# E127 continuation line over-indented for visual indent
# E128 continuation line under-indented for visual indent
......@@ -18,7 +17,7 @@ ignore =
# F401 module imported but unused
# F841 local variable name is assigned to but never used
# TODO: Fix some of these?
E127,E128,E501,E731,F401,F841,
E501,E731,F401,F841,
# =====================================
# pydocstyle: D1## - Missing Docstrings
# =====================================
......@@ -36,6 +35,12 @@ ignore =
# D211 No blank lines allowed before class docstring
# We ignore D203 deliberately in favour of passing D211,
D203,
# ====================
# flake8-bugbear: B###
# ====================
# B007 Loop control variable not used within the loop body.
# If this is intended, start the name with an underscore.
B007,
# ================================================
# flake8-commas: C#### (in case installed locally)
# ================================================
......
# Copyright 2004 by Harry Zuzan. All rights reserved.
# Copyright 2016 by Adam Kurkiewicz. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Reading information from Affymetrix CEL files version 3 and 4."""
......
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Deal with Affymetrix related data such as cel files."""
......@@ -278,7 +278,7 @@ class SummaryInfo(object):
def _pair_replacement(self, seq1, seq2, weight1, weight2,
start_dict, ignore_chars):
"""Compare two sequences and generate info on the replacements seen.
"""Compare two sequences and generate info on the replacements seen (PRIVATE).
Arguments:
- seq1, seq2 - The two sequences to compare.
......@@ -549,7 +549,7 @@ class SummaryInfo(object):
def _get_letter_freqs(self, residue_num, all_records, letters, to_ignore,
pseudo_count=0, e_freq_table=None, random_expected=None):
"""Determine the frequency of specific letters in the alignment.
"""Determine the frequency of specific letters in the alignment (PRIVATE).
Arguments:
- residue_num - The number of the column we are getting frequencies
......@@ -632,7 +632,7 @@ class SummaryInfo(object):
def _get_column_info_content(self, obs_freq, e_freq_table, log_base,
random_expected):
"""Calculate the information content for a column.
"""Calculate the information content for a column (PRIVATE).
Arguments:
- obs_freq - The frequencies observed for each letter in the column.
......
......@@ -44,8 +44,7 @@ class MSAProbsCommandline(AbstractCommandline):
def __init__(self, cmd="msaprobs", **kwargs):
"""Initialize the class."""
# order of parameters is the same as in msaprobs -help
self.parameters = \
[
self.parameters = [
_Option(["-o", "--outfile", "outfile"],
"specify the output file name (STDOUT by default)",
filename=True,
......
......@@ -19,7 +19,14 @@ from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord, _RestrictedDict
from Bio import Alphabet
try:
from Bio.Align import _aligners
except ImportError as e:
new_exc = ImportError("{}: you should not import directly from the "
"biopython source directory; please exit the source "
"tree and re-launch your code from there".format(e))
new_exc.__cause__ = None
raise new_exc
class MultipleSeqAlignment(object):
......@@ -149,28 +156,7 @@ class MultipleSeqAlignment(object):
{'tool': 'demo'}
>>> align.column_annotations
{'stats': 'CCCXCCC'}
NOTE - The older Bio.Align.Generic.Alignment class only accepted a
single argument, an alphabet. This is still supported via a backwards
compatible "hack" so as not to disrupt existing scripts and users, but
is deprecated and will be removed in a future release.
"""
if isinstance(records, (Alphabet.Alphabet, Alphabet.AlphabetEncoder)):
if alphabet is None:
# TODO - Remove this backwards compatible mode!
alphabet = records
records = []
import warnings
from Bio import BiopythonDeprecationWarning
warnings.warn("Invalid records argument: While the old "
"Bio.Align.Generic.Alignment class only "
"accepted a single argument (the alphabet), the "
"newer Bio.Align.MultipleSeqAlignment class "
"expects a list/iterator of SeqRecord objects "
"(which can be an empty list) and an optional "
"alphabet argument", BiopythonDeprecationWarning)
else:
raise ValueError("Invalid records argument")
if alphabet is not None:
if not isinstance(alphabet, (Alphabet.Alphabet, Alphabet.AlphabetEncoder)):
raise ValueError("Invalid alphabet argument")
......
This diff is collapsed.
......@@ -51,10 +51,9 @@ def _extract_alignment_region(alignment_seq_with_flanking, annotation):
end = int(annotation['al_stop']) - display_start + 1
else:
# FASTA has flipped this sequence...
start = display_start \
- int(annotation['al_start'])
end = display_start \
- int(annotation['al_stop']) + 1
start = display_start - int(annotation['al_start'])
end = display_start - int(annotation['al_stop']) + 1
end += align_stripped.count("-")
if start < 0 or start >= end or end > len(align_stripped):
raise ValueError("Problem with sequence start/stop,\n%s[%i:%i]\n%s"
......
......@@ -80,8 +80,8 @@ from .Interfaces import AlignmentIterator
from .Interfaces import SequentialAlignmentWriter
XMFA_HEADER_REGEX = re.compile("> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>.*)")
XMFA_HEADER_REGEX_BIOPYTHON = re.compile("> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>[^#]*) # (?P<realname>.*)")
XMFA_HEADER_REGEX = re.compile(r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>.*)")
XMFA_HEADER_REGEX_BIOPYTHON = re.compile(r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>[^#]*) # (?P<realname>.*)")
ID_LINE_FMT = "> {seq_name}:{start}-{end} {strand} {file} # {ugly_hack}\n"
......
......@@ -98,14 +98,7 @@ class PhylipWriter(SequentialAlignmentWriter):
Note that Tab characters count as only one character in the
species names. Their inclusion can cause trouble.
"""
name = record.id.strip()
# Either remove the banned characters, or map them to something
# else like an underscore "_" or pipe "|" character...
for char in "[](),":
name = name.replace(char, "")
for char in ":;":
name = name.replace(char, "|")
name = name[:id_width]
name = sanitize_name(record.id, id_width)
if name in names:
raise ValueError("Repeated name %r (originally %r), "
"possibly due to truncation"
......@@ -329,14 +322,9 @@ class SequentialPhylipWriter(SequentialAlignmentWriter):
# Apply this test *after* cleaning the identifiers
names = []
for record in alignment:
name = record.id.strip()
# Either remove the banned characters, or map them to something
# else like an underscore "_" or pipe "|" character...
for char in "[](),":
name = name.replace(char, "")
for char in ":;":
name = name.replace(char, "|")
name = name[:id_width]
name = sanitize_name(record.id, id_width)
if name in names:
raise ValueError("Repeated name %r (originally %r), "
"possibly due to truncation"
......@@ -441,3 +429,19 @@ class SequentialPhylipIterator(PhylipIterator):
id=i, name=i, description=i)
for (i, s) in zip(ids, seqs))
return MultipleSeqAlignment(records, self.alphabet)
def sanitize_name(name, width=None):
"""Sanitise sequence identifier for output.
Removes the banned characters "[]()" and replaces the characters ":;"
with "|". The name is truncated to "width" characters if specified.
"""
name = name.strip()
for char in "[](),":
name = name.replace(char, "")
for char in ":;":
name = name.replace(char, "|")
if width is not None:
name = name[:width]
return name
......@@ -490,13 +490,12 @@ class AbstractCommandline(object):
# Using universal newlines is important on Python 3, this
# gives unicode handles rather than bytes handles.
# Windows 7, 8 and 8.1 want shell = True
# TODO: Test under Windows 10 and revisit platform detection.
# Windows 7, 8, 8.1 and 10 want shell = True
if sys.platform != "win32":
use_shell = True
else:
win_ver = platform.win32_ver()[0]
if win_ver in ["7", "8", "post2012Server"]:
if win_ver in ["7", "8", "post2012Server", "10"]:
use_shell = True
else:
use_shell = False
......
......@@ -19,6 +19,7 @@ Wrappers for the new NCBI BLAST+ tools (written in C++):
- NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
- NcbideltablastCommandline - Protein-Protein domain enhanced lookup time accelerated blast
- NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
- NcbimakeblastdbCommandline - Application to create BLAST databases
For further details, see:
......@@ -1234,8 +1235,146 @@ class NcbideltablastCommandline(_Ncbiblast2SeqCommandline):
_Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
class NcbimakeblastdbCommandline(AbstractCommandline):
"""Wrapper for the NCBI BLAST+ program makeblastdb.
This is a wrapper for the NCBI BLAST+ makeblastdb application
to create BLAST databases. By default, this creates a blast database
with the same name as the input file. The default output location
is the same directory as the input.
>>> from Bio.Blast.Applications import NcbimakeblastdbCommandline
>>> cline = NcbimakeblastdbCommandline(dbtype="prot",
... input_file="NC_005816.faa")
>>> cline
NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa')
>>> print(cline)
makeblastdb -dbtype prot -in NC_005816.faa
You would typically run the command line with cline() or via the Python
subprocess module, as described in the Biopython tutorial.
"""
def __init__(self, cmd="makeblastdb", **kwargs):
"""Initialize the class."""
self.parameters = [
# Basic input options
_Switch(["-h", "h"],
"Print USAGE and DESCRIPTION; ignore other arguments."),
_Switch(["-help", "help"],
"Print USAGE, DESCRIPTION and ARGUMENTS description; "
"ignore other arguments."),
_Switch(["-version", "version"],
"Print version number; ignore other arguments."),
# Output configuration options
_Option(["-out", "out"],
"Output file for alignment.",
filename=True,
equate=False),
# makeblastdb specific options
_Option(["-dbtype", "dbtype"],
"Molecule type of target db ('nucl' or 'prot')",
equate=False,
is_required=True,
checker_function=lambda x: x == 'nucl' or x == 'prot'),
_Option(["-in", "input_file"],
"Input file/database name",
filename=True,
equate=False),
_Option(["-input_type", "input_type"],
"Type of the data specified in input_file. "
"Default = 'fasta'. Added in BLAST 2.2.26.",
filename=False,
equate=False,
checker_function=self._input_type_checker),
_Option(["-title", "title"],
"Title for BLAST database",
filename=False,
equate=False),
_Switch(["-parse_seqids", "parse_seqids"],
"Option to parse seqid for FASTA input if set, for all "
"other input types seqids are parsed automatically"),
_Switch(["-hash_index", "hash_index"],
"Create index of sequence hash values."),
_Option(["-mask_data", "mask_data"],
"Comma-separated list of input files containing masking "
"data as produced by NCBI masking applications "
"(e.g. dustmasker, segmasker, windowmasker)",
filename=True,
equate=False),
_Option(["-mask_id", "mask_id"],
"Comma-separated list of strings to uniquely identify the "
"masking algorithm",
filename=False,
equate=False),
_Option(["-mask_desc", "mask_desc"],
"Comma-separated list of free form strings to describe "
"the masking algorithm details",
filename=False,
equate=False),
_Switch(["-gi_mask", "gi_mask"],
"Create GI indexed masking data."),
_Option(["-gi_mask_name", "gi_mask_name"],
"Comma-separated list of masking data output files.",
filename=False,
equate=False),
_Option(["-max_file_sz", "max_file_sz"],
"Maximum file size for BLAST database files. "
"Default = '1GB'",
filename=False,
equate=False),
_Option(["-logfile", "logfile"],
"File to which the program log should be redirected",
filename=True,
equate=False),
_Option(["-taxid", "taxid"],
"Taxonomy ID to assign to all sequences",
filename=False,
equate=False,
checker_function=lambda x: type(x)(int(x)) == x),
_Option(["-taxid_map", "taxid_map"],
"Text file mapping sequence IDs to taxonomy IDs. "
"Format:<SequenceId> <TaxonomyId><newline>",
filename=True,
equate=False),
]
AbstractCommandline.__init__(self, cmd, **kwargs)
def _input_type_checker(command, x):
return x in ('asn1_bin', 'asn1_txt', 'blastdb', 'fasta')
def _validate(self):
incompatibles = {"mask_id": ["gi_mask"],
"gi_mask": ["mask_id"],
"taxid": ["taxid_map"]}
# Copied from _NcbibaseblastCommandline class above.
# Code repeated here for python2 and 3 comptaibility,
# because this is not a _NcbibaseblastCommandline subclass.
for a in incompatibles:
if self._get_parameter(a):
for b in incompatibles[a]:
if self._get_parameter(b):
raise ValueError("Options %s and %s are incompatible."
% (a, b))
if self.mask_id and not self.mask_data:
raise ValueError("Option mask_id requires mask_data to be set.")
if self.mask_desc and not self.mask_id:
raise ValueError("Option mask_desc requires mask_id to be set.")
if self.gi_mask and not self.parse_seqids:
raise ValueError("Option gi_mask requires parse_seqids to be set.")
if self.gi_mask_name and not (self.mask_data and self.gi_mask):
raise ValueError("Option gi_mask_name requires mask_data and "
"gi_mask to be set.")
if self.taxid_map and not self.parse_seqids:
raise ValueError("Option taxid_map requires parse_seqids "
"to be set.")
AbstractCommandline._validate(self)
def _test():
"""Run the Bio.Blast.Applications module's doctests."""
"""Run the Bio.Blast.Applications module's doctests (PRIVATE)."""
import doctest
doctest.testmod(verbose=1)
......
......@@ -172,16 +172,16 @@ def qblast(program, database, sequence, url_base=NCBI_BLAST_URL,
# will take longer thus at least 70s with delay. Therefore,
# start with 20s delay, thereafter once a minute.
delay = 20 # seconds
previous = time.time()
while True:
current = time.time()
wait = previous + delay - current
wait = qblast._previous + delay - current
if wait > 0:
time.sleep(wait)
previous = current + wait
qblast._previous = current + wait
else:
previous = current
if delay < 60:
qblast._previous = current
# delay by at least 60 seconds only if running the request against the public NCBI API
if delay < 60 and url_base == NCBI_BLAST_URL:
# Wasn't a quick return, must wait at least a minute
delay = 60
......@@ -203,10 +203,12 @@ def qblast(program, database, sequence, url_base=NCBI_BLAST_URL,
status = results[i + len("Status="):j].strip()
if status.upper() == "READY":
break
return StringIO(results)
qblast._previous = 0
def _parse_qblast_ref_page(handle):
"""Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).
......
......@@ -37,10 +37,10 @@ class _XMLparser(ContentHandler):
self._debug_ignore_list = []
def _secure_name(self, name):
"""Remove 'dangerous' from tag names.
"""Remove 'dangerous' from tag names (PRIVATE).
Arguments:
- name -- name to be 'secured'
- name -- name to be 'secured'.
"""
# Replace '-' with '_' in XML tag names
......@@ -160,10 +160,12 @@ class BlastParser(_XMLparser):
self._parameters.filter = None # Maybe I should update the class?
def _start_Iteration(self):
"""Start interaction (PRIVATE)."""
self._blast = Record.Blast()
pass
def _end_Iteration(self):
"""End interaction (PRIVATE)."""
# We stored a lot of generic "top level" information
# in self._header (an object of type Record.Header)
self._blast.reference = self._header.reference
......@@ -222,14 +224,14 @@ class BlastParser(_XMLparser):
# Header
def _end_BlastOutput_program(self):
"""BLAST program, e.g., blastp, blastn, etc.
"""BLAST program, e.g., blastp, blastn, etc. (PRIVATE).
Save this to put on each blast record object
"""
self._header.application = self._value.upper()
def _end_BlastOutput_version(self):
"""Version number and date of the BLAST engine.
"""Version number and date of the BLAST engine (PRIVATE).
e.g. "BLASTX 2.2.12 [Aug-07-2005]" but there can also be
variants like "BLASTP 2.2.18+" without the date.
......@@ -359,17 +361,18 @@ class BlastParser(_XMLparser):
self._parameters.filter = self._value
# def _end_Parameters_pattern(self):
# """Pattern used for phi-blast search
# """Pattern used for phi-blast search (PRIVATE).
# """
# pass # XXX TODO PSI
# def _end_Parameters_entrez_query(self):
# """Entrez query used to limit search
# """Entrez query used to limit search (PRIVATE).
# """
# pass # XXX TODO PSI
# Hits
def _start_Hit(self):
"""Start filling records (PRIVATE)."""
self._blast.alignments.append(Record.Alignment())
self._blast.descriptions.append(Record.Description())
self._blast.multiple_alignment = []
......@@ -378,6 +381,7 @@ class BlastParser(_XMLparser):
self._descr.num_alignments = 0
def _end_Hit(self):
"""Clear variables (PRIVATE)."""
# Cleanup
self._blast.multiple_alignment = None
self._hit = None
......