Skip to content
Commits on Source (10)
......@@ -7,6 +7,6 @@ I have read the ``CONTRIBUTING.rst`` file and understand that AppVeyor and
TravisCI will be used to confirm the Biopython unit tests and ``flake8`` style
checks pass with these changes.
I am happy be thanked by name in the ``NEWS.rst`` and ``CONTRIB.rst`` files,
and have added myself to those files as part of this pull request. (*This
acknowledgement is optional. Note we list the names sorted alphabetically.*)
I have added my name to the alphabetical contributors listings in the files
``NEWS.rst`` and ``CONTRIB.rst`` as part of this pull request, am listed
already, or do not wish to be listed. (*This acknowledgement is optional.*)
......@@ -65,7 +65,7 @@ deps =
{py27,py34,py35,py35}: mysql-connector-python-rf
{py27,py35,pypy}: rdflib
{pypy,pypy3}: numpy==1.12.1
{py27,py34,py35,py36}: numpy
{py27,py34,py36}: numpy
{py36}: scipy
{py27}: networkx
{py36}: matplotlib
......@@ -94,6 +94,7 @@ deps =
flake8-rst-docstrings
restructuredtext_lint
commands =
flake8 --max-line-length 82 setup.py
# These folders each have their own .flake8 file:
flake8 BioSQL/
flake8 Scripts/
......
......@@ -104,10 +104,10 @@ before_install:
- |
if [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then
deactivate
wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.10.0-linux_x86_64-portable.tar.bz2
tar -jxvf pypy-5.10.0-linux_x86_64-portable.tar.bz2
wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-6.0.0-linux_x86_64-portable.tar.bz2
tar -jxvf pypy-6.0.0-linux_x86_64-portable.tar.bz2
echo 'Setting up aliases...'
cd pypy-5.10.0-linux_x86_64-portable/bin/
cd pypy-6.0.0-linux_x86_64-portable/bin/
export PATH=$PWD:$PATH
ln -s pypy python
echo 'Setting up pip...'
......@@ -116,10 +116,10 @@ before_install:
- |
if [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then
deactivate
wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.5-5.10.1-linux_x86_64-portable.tar.bz2
tar -jxvf pypy3.5-5.10.1-linux_x86_64-portable.tar.bz2
wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.5-6.0.0-linux_x86_64-portable.tar.bz2
tar -jxvf pypy3.5-6.0.0-linux_x86_64-portable.tar.bz2
echo 'Setting up aliases...'
cd pypy3.5-5.10.1-linux_x86_64-portable/bin/
cd pypy3.5-6.0.0-linux_x86_64-portable/bin/
export PATH=$PWD:$PATH
ln -s pypy3 python
echo 'Setting up pip...'
......
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# Copyright 2000 Brad Chapman. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Extract information from alignment objects.
In order to try and avoid huge alignment objects with tons of functions,
......@@ -179,7 +180,7 @@ class SummaryInfo(object):
return Seq(consensus, consensus_alpha)
def _guess_consensus_alphabet(self, ambiguous):
"""Pick an (ungapped) alphabet for an alignment consesus sequence.
"""Pick an (ungapped) alphabet for an alignment consesus sequence (PRIVATE).
This just looks at the sequences we have, checks their type, and
returns as appropriate type which seems to make sense with the
......@@ -319,7 +320,7 @@ class SummaryInfo(object):
return start_dict
def _get_all_letters(self):
"""Return a string containing the expected letters in the alignment."""
"""Return a string containing the expected letters in the alignment (PRIVATE)."""
all_letters = self.alignment._alphabet.letters
if all_letters is None or \
(isinstance(self.alignment._alphabet, Alphabet.Gapped) and
......@@ -337,7 +338,7 @@ class SummaryInfo(object):
return all_letters
def _get_base_replacements(self, skip_items=None):
"""Get a zeroed dictionary of all possible letter combinations.
"""Get a zeroed dictionary of all possible letter combinations (PRIVATE).
This looks at the type of alphabet and gets the letters for it.
It then creates a dictionary with all possible combinations of these
......
......@@ -2,12 +2,10 @@
# Copyright 2011 by Andreas Wilm. All rights reserved.
# Based on ClustalW wrapper copyright 2009 by Cymon J. Cox.
#
# Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py
# as template.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program Clustal Omega."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program Clustal W."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program DIALIGN2-2."""
from __future__ import print_function
......
# Copyright 2013 by Christian Brueffer. All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple sequence alignment program MSAProbs."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment programme MAFFT."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program MUSCLE."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program PRANK."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program PROBCONS."""
from __future__ import print_function
......
# Copyright 2009 by Cymon J. Cox and Brad Chapman. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program TCOFFEE."""
from __future__ import print_function
......
# Copyright 2009 by Peter Cock & Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Alignment command line tool wrappers."""
from ._Muscle import MuscleCommandline
......
# Copyright 2008-2011 by Peter Cock.
# Copyright 2000, 2004 by Brad Chapman.
# Revisions copyright 2010-2013, 2015-2018 by Peter Cock.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Code for dealing with sequence alignments.
One of the most important things in this module is the MultipleSeqAlignment
......@@ -11,10 +14,13 @@ class, used in the Bio.AlignIO module.
"""
from __future__ import print_function
import sys # Only needed to check if we are using Python 2 or 3
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord, _RestrictedDict
from Bio import Alphabet
from Bio.Align import _aligners
class MultipleSeqAlignment(object):
"""Represents a classical multiple sequence alignment (MSA).
......@@ -47,6 +53,7 @@ class MultipleSeqAlignment(object):
7
>>> for record in align:
... print("%s %i" % (record.id, len(record)))
...
gi|6273285|gb|AF191659.1|AF191 156
gi|6273284|gb|AF191658.1|AF191 156
gi|6273287|gb|AF191661.1|AF191 156
......@@ -370,6 +377,7 @@ class MultipleSeqAlignment(object):
>>> for record in align:
... print(record.id)
... print(record.seq)
...
Alpha
ACTGCTAGCTAG
Beta
......@@ -923,6 +931,434 @@ class MultipleSeqAlignment(object):
self._records.sort(key=key, reverse=reverse)
class PairwiseAlignment(object):
"""Represents a pairwise sequence alignment.
Internally, the pairwise alignment is stored as the path through
the traceback matrix, i.e. a tuple of pairs of indices corresponding
to the vertices of the path in the traceback matrix.
"""
def __init__(self, target, query, path, score):
"""Initialize a new PairwiseAlignment object.
Arguments:
- target - The first sequence, as a plain string, without gaps.
- query - The second sequence, as a plain string, without gaps.
- path - The path through the traceback matrix, defining an
alignment.
- score - The alignment score.
You would normally obtain a PairwiseAlignment object by iterating
over a PairwiseAlignments object.
"""
self.target = target
self.query = query
self.score = score
self.path = path
# For Python2 only
def __cmp__(self, other):
if self.path < other.path:
return -1
if self.path > other.path:
return +1
return 0
def __eq__(self, other):
return self.path == other.path
def __ne__(self, other):
return self.path != other.path
def __lt__(self, other):
return self.path < other.path
def __le__(self, other):
return self.path <= other.path
def __gt__(self, other):
return self.path > other.path
def __ge__(self, other):
return self.path >= other.path
def __format__(self, format_spec):
if format_spec == 'psl':
return self._format_psl()
return str(self)
def __str__(self):
query = self.query
target = self.target
try:
# check if query is a SeqRecord
query = query.seq
except AttributeError:
# query is a Seq object or a plain string
pass
try:
# check if target is a SeqRecord
target = target.seq
except AttributeError:
# target is a Seq object or a plain string
pass
seq1 = str(target)
seq2 = str(query)
n1 = len(seq1)
n2 = len(seq2)
aligned_seq1 = ""
aligned_seq2 = ""
pattern = ""
path = self.path
end1, end2 = path[0]
if end1 > 0 or end2 > 0:
end = max(end1, end2)
aligned_seq1 += "." * (end - end1) + seq1[:end1]
aligned_seq2 += "." * (end - end2) + seq2[:end2]
pattern += '.' * end
start1 = end1
start2 = end2
for end1, end2 in path[1:]:
gap = 0
if end1 == start1:
gap = end2 - start2
aligned_seq1 += '-' * gap
aligned_seq2 += seq2[start2:end2]
pattern += '-' * gap
elif end2 == start2:
gap = end1 - start1
aligned_seq1 += seq1[start1:end1]
aligned_seq2 += '-' * gap
pattern += '-' * gap
else:
s1 = seq1[start1:end1]
s2 = seq2[start2:end2]
aligned_seq1 += s1
aligned_seq2 += s2
for c1, c2 in zip(s1, s2):
if c1 == c2:
pattern += '|'
else:
pattern += 'X'
start1 = end1
start2 = end2
n1 -= end1
n2 -= end2
n = max(n1, n2)
aligned_seq1 += seq1[end1:] + '.' * (n - n1)
aligned_seq2 += seq2[end2:] + '.' * (n - n2)
pattern += '.' * n
return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2)
def _format_psl(self):
query = self.query
target = self.target
try:
Qname = query.id
except AttributeError:
Qname = "query"
else:
query = query.seq
try:
Tname = target.id
except AttributeError:
Tname = "target"
else:
target = target.seq
seq1 = str(target)
seq2 = str(query)
n1 = len(seq1)
n2 = len(seq2)
match = 0
mismatch = 0
repmatch = 0
Ns = 0
Qgapcount = 0
Qgapbases = 0
Tgapcount = 0
Tgapbases = 0
Qsize = n2
Qstart = 0
Qend = Qsize
Tsize = n1
Tstart = 0
Tend = Tsize
blockSizes = []
qStarts = []
tStarts = []
strand = '+'
start1 = 0
start2 = 0
start1, start2 = self.path[0]
for end1, end2 in self.path[1:]:
count1 = end1 - start1
count2 = end2 - start2
if count1 == 0:
if start2 == 0:
Qstart += count2
elif end2 == n2:
Qend -= count2
else:
Qgapcount += 1
Qgapbases += count2
start2 = end2
elif count2 == 0:
if start1 == 0:
Tstart += count1
elif end1 == n1:
Tend -= count1
else:
Tgapcount += 1
Tgapbases += count1
start1 = end1
else:
assert count1 == count2
tStarts.append(start1)
qStarts.append(start2)
blockSizes.append(count1)
for c1, c2 in zip(seq1[start1:end1], seq2[start2:end2]):
if c1 == 'N' or c2 == 'N':
Ns += 1
elif c1 == c2:
match += 1
else:
mismatch += 1
start1 = end1
start2 = end2
blockcount = len(blockSizes)
blockSizes = ",".join(map(str, blockSizes)) + ","
qStarts = ",".join(map(str, qStarts)) + ","
tStarts = ",".join(map(str, tStarts)) + ","
words = [str(match),
str(mismatch),
str(repmatch),
str(Ns),
str(Qgapcount),
str(Qgapbases),
str(Tgapcount),
str(Tgapbases),
strand,
Qname,
str(Qsize),
str(Qstart),
str(Qend),
Tname,
str(Tsize),
str(Tstart),
str(Tend),
str(blockcount),
blockSizes,
qStarts,
tStarts,
]
line = "\t".join(words) + "\n"
return line
class PairwiseAlignments(object):
"""Implements an iterator over pairwise alignments returned by the aligner.
This class also supports indexing, which is fast for increasing indices,
but may be slow for random access of a large number of alignments.
Note that pairwise aligners can return an astronomical number of alignments,
even for relatively short sequences, if they align poorly to each other. We
therefore recommend to first check the number of alignments, accessible as
len(alignments), which can be calculated quickly even if the number of
alignments is very large.
"""
def __init__(self, seqA, seqB, score, paths):
"""Initialize a new PairwiseAlignments object.
Arguments:
- seqA - The first sequence, as a plain string, without gaps.
- seqB - The second sequence, as a plain string, without gaps.
- score - The alignment score.
- paths - An iterator over the paths in the traceback matrix;
each path defines one alignment.
You would normally obtain an PairwiseAlignments object by calling
aligner.align(seqA, seqB), where aligner is a PairwiseAligner object.
"""
self.seqA = seqA
self.seqB = seqB
self.score = score
self.paths = paths
self.index = -1
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
if index == self.index:
return self.alignment
if index < self.index:
self.paths.reset()
self.index = -1
while self.index < index:
try:
alignment = next(self)
except StopIteration:
raise IndexError('index out of range')
return alignment
def __iter__(self):
self.paths.reset()
self.index = -1
return self
def __next__(self):
path = next(self.paths)
self.index += 1
alignment = PairwiseAlignment(self.seqA, self.seqB, path, self.score)
self.alignment = alignment
return alignment
if sys.version_info[0] < 3: # Python 2
next = __next__
class PairwiseAligner(_aligners.PairwiseAligner):
"""Performs pairwise sequence alignment using dynamic programming.
This provides functions to get global and local alignments between two
sequences. A global alignment finds the best concordance between all
characters in two sequences. A local alignment finds just the
subsequences that align the best.
To perform a pairwise sequence alignment, first create a PairwiseAligner
object. This object stores the match and mismatch scores, as well as the
gap scores. Typically, match scores are positive, while mismatch scores
and gap scores are negative or zero. By default, the match score is 1,
and the mismatch and gap scores are zero. Based on the values of the gap
scores, a PairwiseAligner object automatically chooses the appropriate
alignment algorithm (the Needleman-Wunsch, Smith-Waterman, Gotoh, or
Waterman-Smith-Beyer global or local alignment algorithm).
Calling the "score" method on the aligner with two sequences as arguments
will calculate the alignment score between the two sequences.
Calling the "align" method on the aligner with two sequences as arguments
will return a generator yielding the alignments between the two
sequences.
Some examples:
>>> from Bio import Align
>>> aligner = Align.PairwiseAligner()
>>> alignments = aligner.align("ACCGT", "ACG")
>>> for alignment in sorted(alignments):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 3.0:
ACCGT
|-||-
A-CG-
<BLANKLINE>
Score = 3.0:
ACCGT
||-|-
AC-G-
<BLANKLINE>
Specify the aligner mode as local to generate local alignments:
>>> aligner.mode = 'local'
>>> alignments = aligner.align("ACCGT", "ACG")
>>> for alignment in sorted(alignments):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 3.0:
ACCGT
|-||.
A-CG.
<BLANKLINE>
Score = 3.0:
ACCGT
||-|.
AC-G.
<BLANKLINE>
Do a global alignment. Identical characters are given 2 points,
1 point is deducted for each non-identical character.
>>> aligner.mode = 'global'
>>> aligner.match = 2
>>> aligner.mismatch = -1
>>> for alignment in aligner.align("ACCGT", "ACG"):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 6.0:
ACCGT
||-|-
AC-G-
<BLANKLINE>
Score = 6.0:
ACCGT
|-||-
A-CG-
<BLANKLINE>
Same as above, except now 0.5 points are deducted when opening a
gap, and 0.1 points are deducted when extending it.
>>> aligner.open_gap_score = -0.5
>>> aligner.extend_gap_score = -0.1
>>> aligner.target_end_gap_score = 0.0
>>> aligner.query_end_gap_score = 0.0
>>> for alignment in aligner.align("ACCGT", "ACG"):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 5.5:
ACCGT
|-||-
A-CG-
<BLANKLINE>
Score = 5.5:
ACCGT
||-|-
AC-G-
<BLANKLINE>
The alignment function can also use known matrices already included in
Biopython:
>>> from Bio.SubsMat import MatrixInfo
>>> aligner = Align.PairwiseAligner()
>>> aligner.substitution_matrix = MatrixInfo.blosum62
>>> alignments = aligner.align("KEVLA", "EVL")
>>> alignments = list(alignments)
>>> print("Number of alignments: %d" % len(alignments))
Number of alignments: 1
>>> alignment = alignments[0]
>>> print("Score = %.1f" % alignment.score)
Score = 13.0
>>> print(alignment)
KEVLA
-|||-
-EVL-
<BLANKLINE>
"""
def align(self, seqA, seqB):
seqA = str(seqA)
seqB = str(seqB)
score, paths = _aligners.PairwiseAligner.align(self, seqA, seqB)
alignments = PairwiseAlignments(seqA, seqB, score, paths)
return alignments
def score(self, seqA, seqB):
seqA = str(seqA)
seqB = str(seqB)
return _aligners.PairwiseAligner.score(self, seqA, seqB)
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
This diff is collapsed.
......@@ -217,7 +217,8 @@ class ClustalIterator(AlignmentIterator):
break
for i in range(len(ids)):
assert line[0] != " ", "Unexpected line:\n%s" % repr(line)
if line[0] == " ":
raise ValueError("Unexpected line:\n%s" % repr(line))
fields = line.rstrip().split()
# We expect there to be two fields, there can be an optional
......@@ -233,8 +234,8 @@ class ClustalIterator(AlignmentIterator):
if fields[1] != line[seq_cols]:
start = len(fields[0]) + \
line[len(fields[0]):].find(fields[1])
assert start == seq_cols.start, \
'Old location %s -> %i:XX' % (seq_cols, start)
if start != seq_cols.start:
raise ValueError('Old location %s -> %i:XX' % (seq_cols, start))
end = start + len(fields[1])
seq_cols = slice(start, end)
del start, end
......@@ -287,9 +288,9 @@ class ClustalIterator(AlignmentIterator):
alignment._version = version
if consensus:
alignment_length = len(seqs[0])
assert len(consensus) == alignment_length, \
"Alignment length is %i, consensus length is %i, '%s'" \
% (alignment_length, len(consensus), consensus)
if len(consensus) != alignment_length:
raise ValueError("Alignment length is %i, consensus length is %i, '%s'"
% (alignment_length, len(consensus), consensus))
alignment.column_annotations["clustal_consensus"] = consensus
# For backward compatibility prior to .column_annotations:
alignment._star_info = consensus
......
......@@ -152,7 +152,7 @@ class EmbossIterator(AlignmentIterator):
# (an aligned seq is broken up into multiple lines)
id, start = id_start
seq, end = seq_end
if start == end:
if start >= end:
# Special case, either a single letter is present,
# or no letters at all.
if seq.replace("-", "") == "":
......@@ -166,10 +166,10 @@ class EmbossIterator(AlignmentIterator):
start = int(start) - 1 # python counting
end = int(end)
if index < 0 or index >= number_of_seqs:
raise ValueError("Expected index %i in range [0,%i)"
% (index, number_of_seqs))
# The identifier is truncated...
assert 0 <= index and index < number_of_seqs, \
"Expected index %i in range [0,%i)" \
% (index, number_of_seqs)
assert id == ids[index] or id == ids[index][:len(id)]
if len(seq_starts) == index:
......@@ -177,21 +177,20 @@ class EmbossIterator(AlignmentIterator):
seq_starts.append(start)
# Check the start...
if start == end:
if start >= end:
assert seq.replace("-", "") == "", line
else:
assert start - seq_starts[index] == len(seqs[index].replace("-", "")), \
"Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \
elif start - seq_starts[index] != len(seqs[index].replace("-", "")):
raise ValueError("Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s"
% (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
start, line)
start, line))
seqs[index] += seq
# Check the end ...
assert end == seq_starts[index] + len(seqs[index].replace("-", "")), \
"Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \
if end != seq_starts[index] + len(seqs[index].replace("-", "")):
raise ValueError(
"Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s"
% (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
seq_starts[index], end, line)
seq_starts[index], end, line))
index += 1
if index >= number_of_seqs:
......
# Copyright 2008-2016 by Peter Cock. All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
You are expected to use this module via the Bio.AlignIO functions (or the
......@@ -55,9 +56,9 @@ def _extract_alignment_region(alignment_seq_with_flanking, annotation):
end = display_start \
- int(annotation['al_stop']) + 1
end += align_stripped.count("-")
assert 0 <= start and start < end and end <= len(align_stripped), \
"Problem with sequence start/stop,\n%s[%i:%i]\n%s" \
% (alignment_seq_with_flanking, start, end, annotation)
if start < 0 or start >= end or end > len(align_stripped):
raise ValueError("Problem with sequence start/stop,\n%s[%i:%i]\n%s"
% (alignment_seq_with_flanking, start, end, annotation))
return align_stripped[start:end]
......@@ -113,7 +114,7 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
q = "?" # Just for printing len(q) in debug below
m = "?" # Just for printing len(m) in debug below
tool = global_tags.get("tool", "").upper()
try:
q = _extract_alignment_region(query_seq, query_tags)
if tool in ["TFASTX"] and len(match_seq) == len(q):
m = match_seq
......@@ -121,18 +122,18 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
# and the apparent mix of aa and bp coordinates works.
else:
m = _extract_alignment_region(match_seq, match_tags)
assert len(q) == len(m)
except AssertionError as err:
print("Darn... amino acids vs nucleotide coordinates?")
print(tool)
print(query_seq)
print(query_tags)
print("%s %i" % (q, len(q)))
print(match_seq)
print(match_tags)
print("%s %i" % (m, len(m)))
print(handle.name)
raise err
if len(q) != len(m):
message = """Darn... amino acids vs nucleotide coordinates?
tool: {0}
query_seq: {1}
query_tags: {2}
{3} length: {4}
match_seq: {5}
match_tags: {6}
{7} length: {8}
handle.name: {9}
""".format(tool, query_seq, query_tags, q, len(q), match_seq, match_tags, m, len(m), handle.name)
raise ValueError(message)
assert alphabet is not None
alignment = MultipleSeqAlignment([], alphabet)
......@@ -311,7 +312,7 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
# Can get > as the last line of a histogram
pass
else:
assert False, "state %i got %r" % (state, line)
raise RuntimeError("state %i got %r" % (state, line))
elif line.startswith("; al_cons"):
assert state == state_ALIGN_MATCH, line
state = state_ALIGN_CONS
......@@ -337,7 +338,7 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
elif state == state_ALIGN_MATCH:
match_tags[key] = value
else:
assert False, "Unexpected state %r, %r" % (state, line)
raise RuntimeError("Unexpected state %r, %r" % (state, line))
elif state == state_ALIGN_QUERY:
query_seq += line.strip()
elif state == state_ALIGN_MATCH:
......
# Copyright 2008-2013 by Peter Cock. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# Copyright 2008-2018 by Peter Cock. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""AlignIO support module (not for general use).
Unless you are writing a new parser or writer for Bio.AlignIO, you should not
......