Skip to content
Commits on Source (6)
......@@ -40,20 +40,19 @@ ignore =
# =========================================
# Optional ignores for local installations:
# =========================================
BLK100, # Black would make changes, only on local installations (so far)
PIE781, # Assigning to temp variable and then returning, not enforcing
# ========================
# Folder specific ignores:
# ========================
per-file-ignores =
Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815
Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812
Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815,BLK100
Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812,BLK100
# Due to a bug in flake8, we need the following lines for running the
# pre-commit hook. If you made edits above, please change also here!
/Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815
/Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812
/Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815,BLK100
/Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812,BLK100
# =============================
# per-file-ignores error codes:
......@@ -71,6 +70,7 @@ per-file-ignores =
# instead callers should raise AssertionError().
# C812 missing trailing comma
# C815 missing trailing comma in Python 3.5+
# BLK100 Black would make changes
#Tests/*:F401 module imported but unused TODO? (88 occurrences)
# F841 local variable is assigned to but never used TODO? (64 occurrences)
# D101 missing docstring in public class (207 occurrences)
......@@ -83,7 +83,7 @@ per-file-ignores =
# B011 do not call assert False since python -O removes these calls;
# instead callers should raise AssertionError()
# C812 missing trailing comma
# BLK100 Black would make changes
# =======================
# flake8-quotes settings:
# =======================
......
......@@ -25,7 +25,6 @@ dist
#Ignore all compiled python files (e.g. from running the unit tests):
*.pyc
*.pyo
*.py{}
*.py-e
#Ignore all Jython class files (present if using Jython)
......@@ -60,6 +59,8 @@ Tests/biosql.ini
Tests/BioSQL/temp_sqlite.db
Tests/BioSQL/temp_sqlite.db-journal
Tests/Cluster/cyano_result*
# Created by Tests/test_BWA_tool.py:
Tests/out.bam
#TODO - The Tutorial doctests should leave example files after
#running Tests/test_Tutorial.py
......
......@@ -55,6 +55,7 @@ whitelist_externals =
# (But must compile numpy for PyPy right now)
install_command = pip install --only-binary=scipy {opts} {packages}
deps =
numpy
#Lines startings xxx: are filtered by the environment.
#Leaving py36 without any dependencies (even numpy)
cover: coverage
......@@ -69,9 +70,7 @@ deps =
py27,py35: mysql-connector-python-rf
py35,py37: mysqlclient
py27,py35,pypy: rdflib
pypy,pypy3: numpy==1.12.1
pypy,pypy3: mysqlclient
py27,py35,py37: numpy
py37: scipy
py27: networkx
py37: matplotlib
......@@ -101,6 +100,8 @@ deps =
flake8-rst-docstrings
flake8-comprehensions
flake8-bugbear;python_version>="3.5"
flake8-implicit-str-concat;python_version>="3.5"
flake8-black;python_version>="3.6"
flake8-quotes
restructuredtext_lint
doc8
......@@ -140,10 +141,9 @@ commands =
python setup.py sdist --formats=gztar,zip
[testenv:bdist_wheel]
# This should use NumPy while compiling our C code...
# This should not require NumPy while compiling our C code...
skip_install = True
deps =
numpy
commands =
python setup.py bdist_wheel
......
......@@ -62,31 +62,81 @@ matrix:
- stage: test
python: 2.7
env: TOXENV=py27-cover
addons:
apt:
packages:
- &amd64_only_packages [ bwa, ]
- *default_packages
- stage: test
python: 3.5
env: TOXENV=py35-cover
addons:
apt:
packages:
- *amd64_only_packages
- *default_packages
- stage: test
python: 3.6
env: TOXENV=py36-cover
addons:
apt:
packages:
- *amd64_only_packages
- *default_packages
- stage: test
python: 3.7
env: TOXENV=py37-cover
addons:
apt:
packages:
- *amd64_only_packages
- *default_packages
- stage: test
# TODO: Change this once a stable Python 3.8 is on TravisCI:
python: 3.8-dev
python: 3.8
env: TOXENV=py38-cover
addons:
apt:
packages:
- *amd64_only_packages
- *default_packages
- stage: test
python: 3.8
env: TOXENV=py38-nocov
arch: arm64
services:
addons:
apt:
packages:
- stage: test
python: 3.8
env: TOXENV=py38-nocov
arch: ppc64le
- stage: test
python: 3.8
env: TOXENV=py38-cover
arch: s390x
- stage: test
python: pypy
env: TOXENV=pypy-nocov
addons:
apt:
packages:
- *amd64_only_packages
- *default_packages
- stage: test
python: pypy3
env: TOXENV=pypy3-nocov
sudo: false
addons:
apt:
packages:
- bwa
- *amd64_only_packages
- *default_packages
allow_failures:
- arch: arm64
addons:
apt:
packages: &default_packages
- clustalo
- clustalw
- emboss
......@@ -102,8 +152,6 @@ addons:
# We setup $HOME/bin and add it to the $PATH for extra binaries we're using.
#
# There is a phyml Ubuntu package, but currently too old.
#
# There is no GenePop Ubuntu pacakge, although it is in BioConda.
#
# We also need DSSP for testing but it is not available in the repositories.
......@@ -114,26 +162,28 @@ before_install:
- pushd $HOME
- mkdir -p bin
- export PATH=$HOME/bin:$PATH
- echo "Installing PhyML"
- curl -L -O http://www.atgc-montpellier.fr/download/binaries/phyml/PhyML-3.1.zip
- unzip PhyML-3.1.zip
- mv PhyML-3.1/PhyML-3.1_linux64 bin/phyml
#- echo "Installing dssp"
#- curl -L -O ftp://ftp.cmbi.ru.nl/pub/software/dssp/dssp-2.0.4-linux-amd64
#- mv dssp-2.0.4-linux-amd64 bin/dssp
#- chmod a+x bin/dssp
- echo "Installing Genepop"
- curl -L -O https://anaconda.org/bioconda/genepop/4.5.1/download/linux-64/genepop-4.5.1-0.tar.bz2
- |
if [ $TRAVIS_CPU_ARCH == amd64 ]; then
echo "Installing Genepop"
curl -L -O https://anaconda.org/bioconda/genepop/4.5.1/download/linux-64/genepop-4.5.1-0.tar.bz2
# This will create ./bin/Genepop and a harmless ./info/ folder.
- tar -jxvf genepop-4.5.1-0.tar.bz2
tar -jxvf genepop-4.5.1-0.tar.bz2
fi
# Setup environment for t-coffee
- mkdir -p $HOME/tcoffee_temp
- export HOME_4_TCOFFEE=$HOME/tcoffee_temp
- popd
- cp Tests/biosql.ini.sample Tests/biosql.ini
- psql -c "create database biosql_test;" -U postgres
- psql -c "create user biosql_user with encrypted password 'biosql_pass';" -U postgres
- psql -c "grant all privileges on database biosql_test to biosql_user;" -U postgres
- |
if [ $TRAVIS_CPU_ARCH == amd64 ]; then
psql -c "create database biosql_test;" -U postgres
psql -c "create user biosql_user with encrypted password 'biosql_pass';" -U postgres
psql -c "grant all privileges on database biosql_test to biosql_user;" -U postgres
fi
# This is minimal and used under all stages
......
......@@ -48,7 +48,7 @@ class DialignCommandline(AbstractCommandline):
self.parameters = \
[
_Switch(["-afc", "afc"],
"Creates additional output file '*.afc' "
r"Creates additional output file '\*.afc' "
"containing data of all fragments considered "
"for alignment WARNING: this file can be HUGE !"),
_Switch(["-afc_v", "afc_v"],
......@@ -70,7 +70,7 @@ class DialignCommandline(AbstractCommandline):
_Switch(["-fa", "fa"],
"Additional output file in FASTA format."),
_Switch(["-ff", "ff"],
"Creates file *.frg containing information about all "
r"Creates file \*.frg containing information about all "
"fragments that are part of the respective optimal "
"pairwise alignmnets plus information about "
"consistency in the multiple alignment"),
......@@ -78,10 +78,10 @@ class DialignCommandline(AbstractCommandline):
"Output files are named <out_file>.<extension>.",
equate=False),
_Switch(["-fop", "fop"],
"Creates file *.fop containing coordinates of all "
r"Creates file \*.fop containing coordinates of all "
"fragments that are part of the respective pairwise alignments."),
_Switch(["-fsm", "fsm"],
"Creates file *.fsm containing coordinates of all "
r"Creates file \*.fsm containing coordinates of all "
"fragments that are part of the final alignment"),
_Switch(["-iw", "iw"],
"Overlap weights switched off (by default, overlap "
......@@ -104,7 +104,7 @@ class DialignCommandline(AbstractCommandline):
checker_function=lambda x: isinstance(x, int),
equate=False),
_Switch(["-lo", "lo"],
"(Long Output) Additional file *.log with information "
r"(Long Output) Additional file \*.log with information "
"about fragments selected for pairwise alignment and "
"about consistency in multi-alignment procedure."),
_Switch(["-ma", "ma"],
......@@ -112,10 +112,10 @@ class DialignCommandline(AbstractCommandline):
"N-fragments if nucleic acid sequences are aligned."),
_Switch(["-mask", "mask"],
"Residues not belonging to selected fragments are "
"replaced by '*' characters in output alignment "
r"replaced by '\*' characters in output alignment "
"(rather than being printed in lower-case characters)"),
_Switch(["-mat", "mat"],
"Creates file *mat with substitution counts derived "
r"Creates file \*mat with substitution counts derived "
"from the fragments that have been selected for alignment."),
_Switch(["-mat_thr", "mat_thr"],
"Like '-mat' but only fragments with weight score "
......@@ -150,7 +150,7 @@ class DialignCommandline(AbstractCommandline):
"are used only if up to 35 sequences are aligned since "
"calculating overlap weights is time consuming)."),
_Switch(["-pst", "pst"],
"'print status'. Creates and updates a file *.sta with "
r"'print status'. Creates and updates a file \*.sta with "
"information about the current status of the program "
"run. This option is recommended if large data sets "
"are aligned since it allows the user to estimate the "
......@@ -161,7 +161,7 @@ class DialignCommandline(AbstractCommandline):
"alignment or alignment of translated DNA fragments "
"at the expense of sensitivity."),
_Option(["-stars", "stars"],
"Maximum number of '*' characters indicating degree "
r"Maximum number of '\*' characters indicating degree "
"of local similarity among sequences. By default, no "
"stars are used but numbers between 0 and 9, instead.",
checker_function=lambda x: x in range(0, 10),
......
This diff is collapsed.
......@@ -79,7 +79,9 @@ class ClustalWriter(SequentialAlignmentWriter):
# now we need to print out the star info, if we've got it
if star_info:
output += (" " * 36) + star_info[cur_char:(cur_char + show_num)] + "\n"
output += (
(" " * 36) + star_info[cur_char : (cur_char + show_num)] + "\n"
)
output += "\n"
cur_char += show_num
......@@ -111,9 +113,10 @@ class ClustalIterator(AlignmentIterator):
# Whitelisted headers we know about
known_headers = ["CLUSTAL", "PROBCONS", "MUSCLE", "MSAPROBS", "Kalign"]
if line.strip().split()[0] not in known_headers:
raise ValueError("%s is not a known CLUSTAL header: %s" %
(line.strip().split()[0],
", ".join(known_headers)))
raise ValueError(
"%s is not a known CLUSTAL header: %s"
% (line.strip().split()[0], ", ".join(known_headers))
)
# find the clustal version in the header line
version = None
......@@ -153,8 +156,7 @@ class ClustalIterator(AlignmentIterator):
# Record the sequence position to get the consensus
if seq_cols is None:
start = len(fields[0]) + \
line[len(fields[0]):].find(fields[1])
start = len(fields[0]) + line[len(fields[0]) :].find(fields[1])
end = start + len(fields[1])
seq_cols = slice(start, end)
del start, end
......@@ -165,11 +167,13 @@ class ClustalIterator(AlignmentIterator):
try:
letters = int(fields[2])
except ValueError:
raise ValueError("Could not parse line, "
"bad sequence number:\n%s" % line)
raise ValueError(
"Could not parse line, bad sequence number:\n%s" % line
)
if len(fields[1].replace("-", "")) != letters:
raise ValueError("Could not parse line, "
"invalid sequence number:\n%s" % line)
raise ValueError(
"Could not parse line, invalid sequence number:\n%s" % line
)
elif line[0] == " ":
# Sequence consensus line...
assert len(ids) == len(seqs)
......@@ -228,13 +232,13 @@ class ClustalIterator(AlignmentIterator):
raise ValueError("Could not parse line:\n%s" % repr(line))
if fields[0] != ids[i]:
raise ValueError("Identifiers out of order? "
"Got '%s' but expected '%s'"
% (fields[0], ids[i]))
raise ValueError(
"Identifiers out of order? Got '%s' but expected '%s'"
% (fields[0], ids[i])
)
if fields[1] != line[seq_cols]:
start = len(fields[0]) + \
line[len(fields[0]):].find(fields[1])
start = len(fields[0]) + line[len(fields[0]) :].find(fields[1])
if start != seq_cols.start:
raise ValueError("Old location %s -> %i:XX" % (seq_cols, start))
end = start + len(fields[1])
......@@ -250,12 +254,13 @@ class ClustalIterator(AlignmentIterator):
try:
letters = int(fields[2])
except ValueError:
raise ValueError("Could not parse line, "
"bad sequence number:\n%s" %
line)
raise ValueError(
"Could not parse line, bad sequence number:\n%s" % line
)
if len(seqs[i].replace("-", "")) != letters:
raise ValueError("Could not parse line, "
"invalid sequence number:\n%s" % line)
raise ValueError(
"Could not parse line, invalid sequence number:\n%s" % line
)
# Read in the next line
line = handle.readline()
......@@ -274,14 +279,19 @@ class ClustalIterator(AlignmentIterator):
if len(seqs) == 0 or len(seqs[0]) == 0:
raise StopIteration
if self.records_per_alignment is not None and \
self.records_per_alignment != len(ids):
raise ValueError("Found %i records in this alignment, "
"told to expect %i"
% (len(ids), self.records_per_alignment))
records = (SeqRecord(Seq(s, self.alphabet), id=i, description=i)
for (i, s) in zip(ids, seqs))
if (
self.records_per_alignment is not None
and self.records_per_alignment != len(ids)
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment)
)
records = (
SeqRecord(Seq(s, self.alphabet), id=i, description=i)
for (i, s) in zip(ids, seqs)
)
alignment = MultipleSeqAlignment(records, self.alphabet)
# TODO - Handle alignment annotation better, for now
# mimic the old parser in Bio.Clustalw
......@@ -290,8 +300,10 @@ class ClustalIterator(AlignmentIterator):
if consensus:
alignment_length = len(seqs[0])
if len(consensus) != alignment_length:
raise ValueError("Alignment length is %i, consensus length is %i, '%s'"
% (alignment_length, len(consensus), consensus))
raise ValueError(
"Alignment length is %i, consensus length is %i, '%s'"
% (alignment_length, len(consensus), consensus)
)
alignment.column_annotations["clustal_consensus"] = consensus
# For backward compatibility prior to .column_annotations:
alignment._star_info = consensus
......
......@@ -59,7 +59,9 @@ class EmbossWriter(SequentialAlignmentWriter):
handle.write("#\n")
handle.write("#=======================================\n")
handle.write("\n")
raise NotImplementedError("The subclass should implement the write_alignment method.")
raise NotImplementedError(
"The subclass should implement the write_alignment method."
)
class EmbossIterator(AlignmentIterator):
......@@ -134,10 +136,14 @@ class EmbossIterator(AlignmentIterator):
if length_of_seqs is None:
raise ValueError("Length of sequences missing!")
if self.records_per_alignment is not None \
and self.records_per_alignment != number_of_seqs:
raise ValueError("Found %i records in this alignment, told to expect %i"
% (number_of_seqs, self.records_per_alignment))
if (
self.records_per_alignment is not None
and self.records_per_alignment != number_of_seqs
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (number_of_seqs, self.records_per_alignment)
)
seqs = ["" for id in ids]
seq_starts = []
......@@ -168,8 +174,10 @@ class EmbossIterator(AlignmentIterator):
end = int(end)
if index < 0 or index >= number_of_seqs:
raise ValueError("Expected index %i in range [0,%i)"
% (index, number_of_seqs))
raise ValueError(
"Expected index %i in range [0,%i)"
% (index, number_of_seqs)
)
# The identifier is truncated...
assert id == ids[index] or id == ids[index][: len(id)]
......@@ -181,17 +189,33 @@ class EmbossIterator(AlignmentIterator):
if start >= end:
assert seq.replace("-", "") == "", line
elif start - seq_starts[index] != len(seqs[index].replace("-", "")):
raise ValueError("Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s"
% (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
start, line))
raise ValueError(
"Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s"
% (
len(seqs[index].replace("-", "")),
index,
id,
repr(seqs[index]),
start,
line,
)
)
seqs[index] += seq
# Check the end ...
if end != seq_starts[index] + len(seqs[index].replace("-", "")):
raise ValueError(
"Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s"
% (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
seq_starts[index], end, line))
% (
len(seqs[index].replace("-", "")),
index,
id,
repr(seqs[index]),
seq_starts[index],
end,
line,
)
)
index += 1
if index >= number_of_seqs:
......@@ -207,18 +231,24 @@ class EmbossIterator(AlignmentIterator):
raise ValueError("Unrecognised EMBOSS pairwise line: %r\n" % line)
line = handle.readline()
if line.rstrip() == "#---------------------------------------" \
or line.rstrip() == "#=======================================":
if (
line.rstrip() == "#---------------------------------------"
or line.rstrip() == "#======================================="
):
# End of alignment
self._header = line
break
assert index == 0
if self.records_per_alignment is not None \
and self.records_per_alignment != len(ids):
raise ValueError("Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment))
if (
self.records_per_alignment is not None
and self.records_per_alignment != len(ids)
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment)
)
records = []
for id, seq in zip(ids, seqs):
......@@ -227,9 +257,10 @@ class EmbossIterator(AlignmentIterator):
# for leading gaps, and thus fails to parse. This old version
# is still used as of Dec 2008 behind the EBI SOAP webservice:
# http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl
raise ValueError("Error parsing alignment - sequences of "
raise ValueError(
"Error parsing alignment - sequences of "
"different length? You could be using an "
"old version of EMBOSS.")
records.append(SeqRecord(Seq(seq, self.alphabet),
id=id, description=id))
"old version of EMBOSS."
)
records.append(SeqRecord(Seq(seq, self.alphabet), id=id, description=id))
return MultipleSeqAlignment(records, self.alphabet, annotations=header_dict)
......@@ -56,8 +56,10 @@ def _extract_alignment_region(alignment_seq_with_flanking, annotation):
end += align_stripped.count("-")
if start < 0 or start >= end or end > len(align_stripped):
raise ValueError("Problem with sequence start/stop,\n%s[%i:%i]\n%s"
% (alignment_seq_with_flanking, start, end, annotation))
raise ValueError(
"Problem with sequence start/stop,\n%s[%i:%i]\n%s"
% (alignment_seq_with_flanking, start, end, annotation)
)
return align_stripped[start:end]
......@@ -105,8 +107,7 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
def build_hsp():
if not query_tags and not match_tags:
raise ValueError("No data for query %r, match %r"
% (query_id, match_id))
raise ValueError("No data for query %r, match %r" % (query_id, match_id))
assert query_tags, query_tags
assert match_tags, match_tags
evalue = align_tags.get("fa_expect")
......@@ -131,7 +132,18 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
match_tags: {6}
{7} length: {8}
handle.name: {9}
""".format(tool, query_seq, query_tags, q, len(q), match_seq, match_tags, m, len(m), handle.name)
""".format(
tool,
query_seq,
query_tags,
q,
len(q),
match_seq,
match_tags,
m,
len(m),
handle.name,
)
raise ValueError(message)
assert alphabet is not None
......@@ -150,11 +162,13 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
# Query
# =====
record = SeqRecord(Seq(q, alphabet),
record = SeqRecord(
Seq(q, alphabet),
id=query_id,
name="query",
description=query_descr,
annotations={"original_length": int(query_tags["sq_len"])})
annotations={"original_length": int(query_tags["sq_len"])},
)
# TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_tags["al_start"])
record._al_stop = int(query_tags["al_stop"])
......@@ -174,11 +188,13 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
# Match
# =====
record = SeqRecord(Seq(m, alphabet),
record = SeqRecord(
Seq(m, alphabet),
id=match_id,
name="match",
description=match_descr,
annotations={"original_length": int(match_tags["sq_len"])})
annotations={"original_length": int(match_tags["sq_len"])},
)
# TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(match_tags["al_start"])
record._al_stop = int(match_tags["al_stop"])
......@@ -322,10 +338,12 @@ def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
else:
import warnings
from Bio import BiopythonParserWarning
# Seen in lalign36, specifically version 36.3.4 Apr, 2011
# Fixed in version 36.3.5b Oct, 2011(preload8)
warnings.warn("Missing colon in line: %r" % line,
BiopythonParserWarning)
warnings.warn(
"Missing colon in line: %r" % line, BiopythonParserWarning
)
try:
key, value = [s.strip() for s in line[2:].split(" ", 1)]
except ValueError:
......
......@@ -26,8 +26,7 @@ class AlignmentIterator(object):
"""
# TODO - Should the default be Gapped(single_letter_alphabet) instead?
def __init__(self, handle, seq_count=None,
alphabet=single_letter_alphabet):
def __init__(self, handle, seq_count=None, alphabet=single_letter_alphabet):
"""Create an AlignmentIterator object.
Arguments:
......@@ -66,6 +65,7 @@ class AlignmentIterator(object):
#####################################################
if sys.version_info[0] < 3:
def next(self):
"""Python 2 style alias for Python 3 style __next__ method."""
return self.__next__()
......
This diff is collapsed.
......@@ -87,8 +87,12 @@ from .Interfaces import AlignmentIterator
from .Interfaces import SequentialAlignmentWriter
XMFA_HEADER_REGEX = re.compile(r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>.*)")
XMFA_HEADER_REGEX_BIOPYTHON = re.compile(r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>[^#]*) # (?P<realname>.*)")
XMFA_HEADER_REGEX = re.compile(
r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>.*)"
)
XMFA_HEADER_REGEX_BIOPYTHON = re.compile(
r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>[^#]*) # (?P<realname>.*)"
)
ID_LINE_FMT = "> {seq_name}:{start}-{end} {strand} {file} # {ugly_hack}\n"
......@@ -156,28 +160,41 @@ class MauveWriter(SequentialAlignmentWriter):
# We remove the "/{start}-{end}" before writing, as it cannot be part
# of the produced XMFA file.
if "start" in record.annotations and "end" in record.annotations:
suffix0 = "/%s-%s" % (str(record.annotations["start"]),
str(record.annotations["end"]))
suffix1 = "/%s-%s" % (str(record.annotations["start"] + 1),
str(record.annotations["end"]))
suffix0 = "/%s-%s" % (
str(record.annotations["start"]),
str(record.annotations["end"]),
)
suffix1 = "/%s-%s" % (
str(record.annotations["start"] + 1),
str(record.annotations["end"]),
)
if seq_name[-len(suffix0) :] == suffix0:
seq_name = seq_name[: -len(suffix0)]
if seq_name[-len(suffix1) :] == suffix1:
seq_name = seq_name[: -len(suffix1)]
if "start" in record.annotations \
and "end" in record.annotations \
and "strand" in record.annotations:
if (
"start" in record.annotations
and "end" in record.annotations
and "strand" in record.annotations
):
id_line = ID_LINE_FMT.format(
seq_name=seq_name, start=record.annotations["start"] + 1, end=record.annotations["end"],
strand=("+" if record.annotations["strand"] == 1 else "-"), file=record.name + ".fa",
ugly_hack=record.id
seq_name=seq_name,
start=record.annotations["start"] + 1,
end=record.annotations["end"],
strand=("+" if record.annotations["strand"] == 1 else "-"),
file=record.name + ".fa",
ugly_hack=record.id,
)
lacking_annotations = False
else:
id_line = ID_LINE_FMT.format(
seq_name=seq_name, start=0, end=0, strand="+",
file=record.name + ".fa", ugly_hack=record.id
seq_name=seq_name,
start=0,
end=0,
strand="+",
file=record.name + ".fa",
ugly_hack=record.id,
)
lacking_annotations = True
......@@ -190,8 +207,12 @@ class MauveWriter(SequentialAlignmentWriter):
# sequences, for the Mauve GUI
# http://darlinglab.org/mauve/user-guide/files.html#non-standard-xmfa-formatting-used-by-the-mauve-gui
id_line = ID_LINE_FMT.format(
seq_name=seq_name, start=0, end=0, strand="+",
file=record.name + ".fa", ugly_hack=record.id
seq_name=seq_name,
start=0,
end=0,
strand="+",
file=record.name + ".fa",
ugly_hack=record.id,
)
self.handle.write(id_line + "\n")
......@@ -284,14 +305,15 @@ class MauveIterator(AlignmentIterator):
alignment_length = max(map(len, list(seqs.values())))
records = []
for id in self._ids:
if id not in seqs or len(seqs[id]) == 0 \
or len(seqs[id]) == 0:
if id not in seqs or len(seqs[id]) == 0 or len(seqs[id]) == 0:
seq = "-" * alignment_length
else:
seq = seqs[id]
if alignment_length != len(seq):
raise ValueError("Sequences have different lengths, or repeated identifier")
raise ValueError(
"Sequences have different lengths, or repeated identifier"
)
# Sometimes we don't see a particular sequence in the
# alignment, so we skip that record since it isn't present in
......@@ -299,7 +321,7 @@ class MauveIterator(AlignmentIterator):
if id not in seq_regions:
continue
if (seq_regions[id]["start"] != 0 or seq_regions[id]["end"] != 0):
if seq_regions[id]["start"] != 0 or seq_regions[id]["end"] != 0:
suffix = "/{start}-{end}".format(**seq_regions[id])
if "realname" in seq_regions[id]:
corrected_id = seq_regions[id]["realname"]
......@@ -313,15 +335,13 @@ class MauveIterator(AlignmentIterator):
else:
corrected_id = seq_regions[id]["name"]
record = SeqRecord(
Seq(seq, self.alphabet),
id=corrected_id,
name=id
)
record = SeqRecord(Seq(seq, self.alphabet), id=corrected_id, name=id)
record.annotations["start"] = seq_regions[id]["start"]
record.annotations["end"] = seq_regions[id]["end"]
record.annotations["strand"] = 1 if seq_regions[id]["strand"] == "+" else -1
record.annotations["strand"] = (
1 if seq_regions[id]["strand"] == "+" else -1
)
records.append(record)
return MultipleSeqAlignment(records, self.alphabet)
......
......@@ -49,14 +49,16 @@ def NexusIterator(handle, seq_count=None):
assert len(n.unaltered_taxlabels) == len(n.taxlabels)
if seq_count and seq_count != len(n.unaltered_taxlabels):
raise ValueError("Found %i sequences, but seq_count=%i"
% (len(n.unaltered_taxlabels), seq_count))
raise ValueError(
"Found %i sequences, but seq_count=%i"
% (len(n.unaltered_taxlabels), seq_count)
)
# TODO - Can we extract any annotation too?
records = (SeqRecord(n.matrix[new_name], id=new_name,
name=old_name, description="")
for old_name, new_name
in zip(n.unaltered_taxlabels, n.taxlabels))
records = (
SeqRecord(n.matrix[new_name], id=new_name, name=old_name, description="")
for old_name, new_name in zip(n.unaltered_taxlabels, n.taxlabels)
)
# All done
yield MultipleSeqAlignment(records, n.alphabet)
......@@ -113,9 +115,10 @@ class NexusWriter(AlignmentWriter):
columns = alignment.get_alignment_length()
if columns == 0:
raise ValueError("Non-empty sequences are required")
minimal_record = "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; " \
+ "format datatype=%s; end;" \
minimal_record = (
"#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=%s; end;"
% self._classify_alphabet_for_nexus(alignment._alphabet)
)
n = Nexus.Nexus(minimal_record)
n.alphabet = alignment._alphabet
for record in alignment:
......@@ -123,7 +126,7 @@ class NexusWriter(AlignmentWriter):
# Note: MrBayes may choke on large alignments if not interleaved
if interleave is None:
interleave = (columns > 1000)
interleave = columns > 1000
n.write_nexus_data(self.handle, interleave=interleave)
def _classify_alphabet_for_nexus(self, alphabet):
......@@ -150,4 +153,5 @@ class NexusWriter(AlignmentWriter):
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest(verbose=0)
......@@ -100,9 +100,10 @@ class PhylipWriter(SequentialAlignmentWriter):
"""
name = sanitize_name(record.id, id_width)
if name in names:
raise ValueError("Repeated name %r (originally %r), "
"possibly due to truncation"
% (name, record.id))
raise ValueError(
"Repeated name %r (originally %r), possibly due to truncation"
% (name, record.id)
)
names.append(name)
sequence = str(record.seq)
if "." in sequence:
......@@ -211,11 +212,14 @@ class PhylipIterator(AlignmentIterator):
assert self._is_header(line)
if self.records_per_alignment is not None and \
self.records_per_alignment != number_of_seqs:
raise ValueError("Found %i records in this alignment, "
"told to expect %i"
% (number_of_seqs, self.records_per_alignment))
if (
self.records_per_alignment is not None
and self.records_per_alignment != number_of_seqs
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (number_of_seqs, self.records_per_alignment)
)
ids = []
seqs = []
......@@ -258,9 +262,10 @@ class PhylipIterator(AlignmentIterator):
if not line:
break # end of file
records = (SeqRecord(Seq("".join(s), self.alphabet),
id=i, name=i, description=i)
for (i, s) in zip(ids, seqs))
records = (
SeqRecord(Seq("".join(s), self.alphabet), id=i, name=i, description=i)
for (i, s) in zip(ids, seqs)
)
return MultipleSeqAlignment(records, self.alphabet)
......@@ -273,8 +278,7 @@ class RelaxedPhylipWriter(PhylipWriter):
# Check inputs
for name in (s.id.strip() for s in alignment):
if any(c in name for c in string.whitespace):
raise ValueError("Whitespace not allowed in identifier: %s"
% name)
raise ValueError("Whitespace not allowed in identifier: %s" % name)
# Calculate a truncation length - maximum length of sequence ID plus a
# single character for padding
......@@ -326,9 +330,10 @@ class SequentialPhylipWriter(SequentialAlignmentWriter):
# else like an underscore "_" or pipe "|" character...
name = sanitize_name(record.id, id_width)
if name in names:
raise ValueError("Repeated name %r (originally %r), "
"possibly due to truncation"
% (name, record.id))
raise ValueError(
"Repeated name %r (originally %r), possibly due to truncation"
% (name, record.id)
)
names.append(name)
# From experimentation, the use of tabs is not understood by the
......@@ -387,11 +392,14 @@ class SequentialPhylipIterator(PhylipIterator):
assert self._is_header(line)
if self.records_per_alignment is not None and \
self.records_per_alignment != number_of_seqs:
raise ValueError("Found %i records in this alignment, "
"told to expect %i"
% (number_of_seqs, self.records_per_alignment))
if (
self.records_per_alignment is not None
and self.records_per_alignment != number_of_seqs
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (number_of_seqs, self.records_per_alignment)
)
ids = []
seqs = []
......@@ -411,9 +419,10 @@ class SequentialPhylipIterator(PhylipIterator):
continue
s = "".join([s, line.strip().replace(" ", "")])
if len(s) > length_of_seqs:
raise ValueError("Found a record of length %i, "
"should be %i"
% (len(s), length_of_seqs))
raise ValueError(
"Found a record of length %i, "
"should be %i" % (len(s), length_of_seqs)
)
if "." in s:
raise ValueError(_NO_DOTS)
seqs.append(s)
......@@ -426,9 +435,10 @@ class SequentialPhylipIterator(PhylipIterator):
self._header = line
break
records = (SeqRecord(Seq(s, self.alphabet),
id=i, name=i, description=i)
for (i, s) in zip(ids, seqs))
records = (
SeqRecord(Seq(s, self.alphabet), id=i, name=i, description=i)
for (i, s) in zip(ids, seqs)
)
return MultipleSeqAlignment(records, self.alphabet)
......
......@@ -184,20 +184,19 @@ class StockholmWriter(SequentialAlignmentWriter):
# These dictionaries should be kept in sync with those
# defined in the StockholmIterator class.
pfam_gr_mapping = {"secondary_structure": "SS",
pfam_gr_mapping = {
"secondary_structure": "SS",
"surface_accessibility": "SA",
"transmembrane": "TM",
"posterior_probability": "PP",
"ligand_binding": "LI",
"active_site": "AS",
"intron": "IN"}
"intron": "IN",
}
# These GC mappings are in addition to *_cons in GR mapping:
pfam_gc_mapping = {"reference_annotation": "RF",
"model_mask": "MM"}
pfam_gc_mapping = {"reference_annotation": "RF", "model_mask": "MM"}
# Following dictionary deliberately does not cover AC, DE or DR
pfam_gs_mapping = {"organism": "OS",
"organism_classification": "OC",
"look": "LO"}
pfam_gs_mapping = {"organism": "OS", "organism_classification": "OC", "look": "LO"}
def write_alignment(self, alignment):
"""Use this to write (another) single alignment to an open file.
......@@ -226,7 +225,9 @@ class StockholmWriter(SequentialAlignmentWriter):
if k in self.pfam_gc_mapping:
self.handle.write("#=GC %s %s\n" % (self.pfam_gc_mapping[k], v))
elif k in self.pfam_gr_mapping:
self.handle.write("#=GC %s %s\n" % (self.pfam_gr_mapping[k] + "_cons", v))
self.handle.write(
"#=GC %s %s\n" % (self.pfam_gr_mapping[k] + "_cons", v)
)
else:
# It doesn't follow the PFAM standards, but should we record
# this data anyway?
......@@ -248,15 +249,17 @@ class StockholmWriter(SequentialAlignmentWriter):
# In the Stockholm file format, spaces are not allowed in the id
seq_name = seq_name.replace(" ", "_")
if "start" in record.annotations \
and "end" in record.annotations:
suffix = "/%s-%s" % (str(record.annotations["start"]),
str(record.annotations["end"]))
if "start" in record.annotations and "end" in record.annotations:
suffix = "/%s-%s" % (
str(record.annotations["start"]),
str(record.annotations["end"]),
)
if seq_name[-len(suffix) :] != suffix:
seq_name = "%s/%s-%s" % (
seq_name,
str(record.annotations["start"]),
str(record.annotations["end"]))
str(record.annotations["end"]),
)
if seq_name in self._ids_written:
raise ValueError("Duplicate record identifier: %s" % seq_name)
......@@ -276,31 +279,32 @@ class StockholmWriter(SequentialAlignmentWriter):
# AC = Accession
if "accession" in record.annotations:
self.handle.write("#=GS %s AC %s\n" % (
seq_name, self.clean(record.annotations["accession"])))
self.handle.write(
"#=GS %s AC %s\n"
% (seq_name, self.clean(record.annotations["accession"]))
)
elif record.id:
self.handle.write("#=GS %s AC %s\n" % (
seq_name, self.clean(record.id)))
self.handle.write("#=GS %s AC %s\n" % (seq_name, self.clean(record.id)))
# DE = description
if record.description:
self.handle.write("#=GS %s DE %s\n" % (
seq_name, self.clean(record.description)))
self.handle.write(
"#=GS %s DE %s\n" % (seq_name, self.clean(record.description))
)
# DE = database links
for xref in record.dbxrefs:
self.handle.write("#=GS %s DR %s\n" % (
seq_name, self.clean(xref)))
self.handle.write("#=GS %s DR %s\n" % (seq_name, self.clean(xref)))
# GS = other per sequence annotation
for key, value in record.annotations.items():
if key in self.pfam_gs_mapping:
data = self.clean(str(value))
if data:
self.handle.write("#=GS %s %s %s\n"
% (seq_name,
self.clean(self.pfam_gs_mapping[key]),
data))
self.handle.write(
"#=GS %s %s %s\n"
% (seq_name, self.clean(self.pfam_gs_mapping[key]), data)
)
else:
# It doesn't follow the PFAM standards, but should we record
# this data anyway?
......@@ -311,10 +315,10 @@ class StockholmWriter(SequentialAlignmentWriter):
if key in self.pfam_gr_mapping and len(str(value)) == len(record.seq):
data = self.clean(str(value))
if data:
self.handle.write("#=GR %s %s %s\n"
% (seq_name,
self.clean(self.pfam_gr_mapping[key]),
data))
self.handle.write(
"#=GR %s %s %s\n"
% (seq_name, self.clean(self.pfam_gr_mapping[key]), data)
)
else:
# It doesn't follow the PFAM standards, but should we record
# this data anyway?
......@@ -354,20 +358,19 @@ class StockholmIterator(AlignmentIterator):
# These dictionaries should be kept in sync with those
# defined in the PfamStockholmWriter class.
pfam_gr_mapping = {"SS": "secondary_structure",
pfam_gr_mapping = {
"SS": "secondary_structure",
"SA": "surface_accessibility",
"TM": "transmembrane",
"PP": "posterior_probability",
"LI": "ligand_binding",
"AS": "active_site",
"IN": "intron"}
"IN": "intron",
}
# These GC mappings are in addition to *_cons in GR mapping:
pfam_gc_mapping = {"RF": "reference_annotation",
"MM": "model_mask"}
pfam_gc_mapping = {"RF": "reference_annotation", "MM": "model_mask"}
# Following dictionary deliberately does not cover AC, DE or DR
pfam_gs_mapping = {"OS": "organism",
"OC": "organism_classification",
"LO": "look"}
pfam_gs_mapping = {"OS": "organism", "OC": "organism_classification", "LO": "look"}
_header = None # for caching lines between __next__ calls
......@@ -424,8 +427,8 @@ class StockholmIterator(AlignmentIterator):
if len(parts) != 2:
# This might be someone attempting to store a zero length sequence?
raise ValueError(
"Could not split line into identifier "
"and sequence:\n" + line)
"Could not split line into identifier and sequence:\n" + line
)
seq_id, seq = parts
if seq_id not in ids:
ids[seq_id] = True
......@@ -488,21 +491,31 @@ class StockholmIterator(AlignmentIterator):
if ids and seqs:
if self.records_per_alignment is not None \
and self.records_per_alignment != len(ids):
raise ValueError("Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment))
if (
self.records_per_alignment is not None
and self.records_per_alignment != len(ids)
):
raise ValueError(
"Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment)
)
alignment_length = len(list(seqs.values())[0])
records = [] # Alignment obj will put them all in a list anyway
for seq_id in ids:
seq = seqs[seq_id]
if alignment_length != len(seq):
raise ValueError("Sequences have different lengths, or repeated identifier")
raise ValueError(
"Sequences have different lengths, or repeated identifier"
)
name, start, end = self._identifier_split(seq_id)
record = SeqRecord(Seq(seq, self.alphabet),
id=seq_id, name=name, description=seq_id,
annotations={"accession": name})
record = SeqRecord(
Seq(seq, self.alphabet),
id=seq_id,
name=name,
description=seq_id,
annotations={"accession": name},
)
# Accession will be overridden by _populate_meta_data if an explicit
# accession is provided:
record.annotations["accession"] = name
......@@ -516,8 +529,9 @@ class StockholmIterator(AlignmentIterator):
records.append(record)
for k, v in gc.items():
if len(v) != alignment_length:
raise ValueError("%s length %i, expected %i"
% (k, len(v), alignment_length))
raise ValueError(
"%s length %i, expected %i" % (k, len(v), alignment_length)
)
alignment = MultipleSeqAlignment(records, self.alphabet)
for k, v in sorted(gc.items()):
......@@ -601,7 +615,9 @@ class StockholmIterator(AlignmentIterator):
# Should we try and parse the strings?
record.dbxrefs = seq_data[feature]
elif feature in self.pfam_gs_mapping:
record.annotations[self.pfam_gs_mapping[feature]] = ", ".join(seq_data[feature])
record.annotations[self.pfam_gs_mapping[feature]] = ", ".join(
seq_data[feature]
)
else:
# Ignore it?
record.annotations["GS:" + feature] = ", ".join(seq_data[feature])
......@@ -611,7 +627,9 @@ class StockholmIterator(AlignmentIterator):
for feature in seq_col_data:
# Note this dictionary contains strings!
if feature in self.pfam_gr_mapping:
record.letter_annotations[self.pfam_gr_mapping[feature]] = seq_col_data[feature]
record.letter_annotations[self.pfam_gr_mapping[feature]] = seq_col_data[
feature
]
else:
# Ignore it?
record.letter_annotations["GR:" + feature] = seq_col_data[feature]
......@@ -619,4 +637,5 @@ class StockholmIterator(AlignmentIterator):
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
......@@ -228,20 +228,25 @@ def write(alignments, handle, format):
count = 0
for alignment in alignments:
if not isinstance(alignment, MultipleSeqAlignment):
raise TypeError("Expect a list or iterator of MultipleSeqAlignment "
"objects, got: %r" % alignment)
raise TypeError(
"Expect a list or iterator of MultipleSeqAlignment "
"objects, got: %r" % alignment
)
SeqIO.write(alignment, fp, format)
count += 1
elif format in _FormatToIterator or format in SeqIO._FormatToIterator:
raise ValueError("Reading format '%s' is supported, but not writing"
% format)
raise ValueError(
"Reading format '%s' is supported, but not writing" % format
)
else:
raise ValueError("Unknown format '%s'" % format)
if not isinstance(count, int):
raise RuntimeError("Internal error - the underlying %s "
raise RuntimeError(
"Internal error - the underlying %s "
"writer should have returned the alignment count, not %s"
% (format, repr(count)))
% (format, repr(count))
)
return count
......@@ -292,17 +297,21 @@ def _force_alphabet(alignment_iterator, alphabet):
# Assume the alphabet argument has been pre-validated
given_base_class = _get_base_alphabet(alphabet).__class__
for align in alignment_iterator:
if not isinstance(_get_base_alphabet(align._alphabet),
given_base_class):
raise ValueError("Specified alphabet %s clashes with "
if not isinstance(_get_base_alphabet(align._alphabet), given_base_class):
raise ValueError(
"Specified alphabet %s clashes with "
"that determined from the file, %s"
% (repr(alphabet), repr(align._alphabet)))
% (repr(alphabet), repr(align._alphabet))
)
for record in align:
if not isinstance(_get_base_alphabet(record.seq.alphabet),
given_base_class):
raise ValueError("Specified alphabet %s clashes with "
if not isinstance(
_get_base_alphabet(record.seq.alphabet), given_base_class
):
raise ValueError(
"Specified alphabet %s clashes with "
"that determined from the file, %s"
% (repr(alphabet), repr(record.seq.alphabet)))
% (repr(alphabet), repr(record.seq.alphabet))
)
record.seq.alphabet = alphabet
align._alphabet = alphabet
yield align
......@@ -351,8 +360,9 @@ def parse(handle, format, seq_count=None, alphabet=None):
raise ValueError("Format required (lower case string)")
if format != format.lower():
raise ValueError("Format string '%s' should be lower case" % format)
if alphabet is not None and not (isinstance(alphabet, Alphabet) or
isinstance(alphabet, AlphabetEncoder)):
if alphabet is not None and not (
isinstance(alphabet, Alphabet) or isinstance(alphabet, AlphabetEncoder)
):
raise ValueError("Invalid alphabet, %s" % repr(alphabet))
if seq_count is not None and not isinstance(seq_count, int):
raise TypeError("Need integer for seq_count (sequences per alignment)")
......@@ -369,14 +379,13 @@ def parse(handle, format, seq_count=None, alphabet=None):
i = iterator_generator(fp, seq_count, alphabet=alphabet)
except TypeError:
# It isn't supported.
i = _force_alphabet(iterator_generator(fp, seq_count),
alphabet)
i = _force_alphabet(iterator_generator(fp, seq_count), alphabet)
elif format in SeqIO._FormatToIterator:
# Exploit the existing SeqIO parser to the dirty work!
i = _SeqIO_to_alignment_iterator(fp, format,
alphabet=alphabet,
seq_count=seq_count)
i = _SeqIO_to_alignment_iterator(
fp, format, alphabet=alphabet, seq_count=seq_count
)
else:
raise ValueError("Unknown format '%s'" % format)
......@@ -448,7 +457,10 @@ def read(handle, format, seq_count=None, alphabet=None):
raise ValueError("More than one record found in handle")
if seq_count:
if len(first) != seq_count:
raise RuntimeError("More sequences found in alignment than specified in seq_count: %s." % seq_count)
raise RuntimeError(
"More sequences found in alignment than specified in seq_count: %s."
% seq_count
)
return first
......@@ -482,4 +494,5 @@ def convert(in_file, in_format, out_file, out_format, alphabet=None):
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
......@@ -247,12 +247,16 @@ class AbstractCommandline(object):
doc = p.description
if isinstance(p, _Switch):
doc += "\n\nThis property controls the addition of the %s " \
doc += (
"\n\nThis property controls the addition of the %s "
"switch, treat this property as a boolean." % p.names[0]
)
else:
doc += "\n\nThis controls the addition of the %s parameter " \
"and its associated value. Set this property to the " \
doc += (
"\n\nThis controls the addition of the %s parameter "
"and its associated value. Set this property to the "
"argument value required." % p.names[0]
)
prop = property(getter(name), setter(name), deleter(name), doc)
setattr(self.__class__, name, prop) # magic!
for key, value in kwargs.items():
......
......@@ -639,10 +639,14 @@ class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
"""Use composition-based statistics for blastp, blastx, or tblastn:
D or d: default (equivalent to 2 )
0 or F or f: no composition-based statistics
1: Composition-based statistics as in NAR 29:2994-3005, 2001
2 or T or t : Composition-based score adjustment as in
Bioinformatics 21:902-911, 2005, conditioned on sequence properties
3: Composition-based score adjustment as in
Bioinformatics 21:902-911, 2005, unconditionally
......@@ -715,12 +719,15 @@ class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
"Minimum score for words to be added to the BLAST lookup table (float).",
equate=False),
_Option(["-comp_based_stats", "comp_based_stats"],
"""Use composition-based statistics (string, default 2, i.e. True).
r"""Use composition-based statistics (string, default 2, i.e. True).
0, F or f: no composition-based statistics
1: Composition-based statistics as in NAR 29:2994-3005, 2001
2, T or t, D or d : Composition-based score adjustment as in
Bioinformatics 21:902-911, 2005, conditioned on sequence properties
3: Composition-based score adjustment as in Bioinformatics 21:902-911,
2005, unconditionally
......@@ -732,7 +739,9 @@ class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
"""Filter query sequence with SEG (string).
Format: "yes", "window locut hicut", or "no" to disable.
Default is "12 2.2 2.5""",
Default is "12 2.2 2.5"
""",
equate=False),
# Extension options:
_Switch(["-ungapped", "ungapped"],
......@@ -805,7 +814,8 @@ class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
"""Filter query sequence with SEG (string).
Format: "yes", "window locut hicut", or "no" to disable.
Default is "12 2.2 2.5""",
Default is "12 2.2 2.5"
""",
equate=False),
]
_NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
......@@ -968,14 +978,15 @@ class NcbirpsblastCommandline(_NcbiblastCommandline):
self.parameters = [
# Query filtering options:
_Option(["-seg", "seg"],
"""Filter query sequence with SEG (string).
r"""Filter query sequence with SEG (string).
Format: "yes", "window locut hicut", or "no" to disable.
Default is "12 2.2 2.5""",
Default is "12 2.2 2.5"
""",
equate=False),
# Restrict search or results:
_Option(["-culling_limit", "culling_limit"],
"""Hit culling limit (integer).
r"""Hit culling limit (integer).
If the query range of a hit is enveloped by that of at
least this many higher-scoring hits, delete the hit.
......@@ -984,14 +995,14 @@ class NcbirpsblastCommandline(_NcbiblastCommandline):
""",
equate=False),
_Option(["-best_hit_overhang", "best_hit_overhang"],
"""Best Hit algorithm overhang value (recommended value: 0.1)
r"""Best Hit algorithm overhang value (recommended value: 0.1)
Float between 0.0 and 0.5 inclusive.
Incompatible with: culling_limit.""",
equate=False),
_Option(["-best_hit_score_edge", "best_hit_score_edge"],
"""Best Hit algorithm score edge value (recommended value: 0.1)
r"""Best Hit algorithm score edge value (recommended value: 0.1)
Float between 0.0 and 0.5 inclusive.
......@@ -999,11 +1010,13 @@ class NcbirpsblastCommandline(_NcbiblastCommandline):
equate=False),
# General search options:
_Option(["-comp_based_stats", "comp_based_stats"],
"""Use composition-based statistics.
r"""Use composition-based statistics.
D or d: default (equivalent to 0 )
0 or F or f: Simplified Composition-based statistics as in
Bioinformatics 15:1000-1011, 1999
1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
Default = 0.
......@@ -1012,7 +1025,7 @@ class NcbirpsblastCommandline(_NcbiblastCommandline):
equate=False),
# Misc options:
_Switch(["-use_sw_tback", "use_sw_tback"],
"Compute locally optimal Smith-Waterman alignments?"),
r"Compute locally optimal Smith-Waterman alignments?"),
]
_NcbiblastCommandline.__init__(self, cmd, **kwargs)
......@@ -1049,7 +1062,7 @@ class NcbirpstblastnCommandline(_NcbiblastCommandline):
self.parameters = [
# Input query options:
_Option(["-strand", "strand"],
"""Query strand(s) to search against database/subject.
r"""Query strand(s) to search against database/subject.
Values allowed are "both" (default), "minus", "plus".""",
checker_function=lambda value: value in ["both",
......@@ -1058,22 +1071,24 @@ class NcbirpstblastnCommandline(_NcbiblastCommandline):
equate=False),
# Input query options:
_Option(["-query_gencode", "query_gencode"],
"Genetic code to use to translate query (integer, default 1).",
r"Genetic code to use to translate query (integer, default 1).",
equate=False),
# Query filtering options:
_Option(["-seg", "seg"],
"""Filter query sequence with SEG (string).
r"""Filter query sequence with SEG (string).
Format: "yes", "window locut hicut", or "no" to disable.
Default is "12 2.2 2.5""",
Default is "12 2.2 2.5" """,
equate=False),
# General search options:
_Option(["-comp_based_stats", "comp_based_stats"],
"""Use composition-based statistics.
r"""Use composition-based statistics.
D or d: default (equivalent to 0 )
0 or F or f: Simplified Composition-based statistics as in
Bioinformatics 15:1000-1011, 1999
1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
Default = 0.
......@@ -1082,10 +1097,10 @@ class NcbirpstblastnCommandline(_NcbiblastCommandline):
equate=False),
# Extension options:
_Switch(["-ungapped", "ungapped"],
"Perform ungapped alignment only?"),
r"Perform ungapped alignment only?"),
# Miscellaneous options:
_Switch(["-use_sw_tback", "use_sw_tback"],
"Compute locally optimal Smith-Waterman alignments?"),
r"Compute locally optimal Smith-Waterman alignments?"),
]
_NcbiblastCommandline.__init__(self, cmd, **kwargs)
......@@ -1174,6 +1189,7 @@ class NcbideltablastCommandline(_Ncbiblast2SeqCommandline):
"""Use composition-based statistics (string, default 2, i.e. True).
0, F or f: no composition-based statistics.
2, T or t, D or d : Composition-based score adjustment as in
Bioinformatics 21:902-911, 2005, conditioned on sequence properties
......
......@@ -27,7 +27,7 @@ class _XMLparser(ContentHandler):
"""
def __init__(self, debug=0):
"""Constructor.
"""Initialize the parser.
Arguments:
- debug - integer, amount of debug information to print
......@@ -139,7 +139,7 @@ class BlastParser(_XMLparser):
"""
def __init__(self, debug=0):
"""Constructor.
"""Initialize the parser.
Arguments:
- debug - integer, amount of debug information to print
......
......@@ -1438,16 +1438,17 @@ static char kmedoids__doc__[] =
"\n"
" Examples are:\n"
"\n"
" >>> from numpy import array\n"
" >>> distance = array([[0.0, 1.1, 2.3],\n"
" ... [1.1, 0.0, 4.5],\n"
" ... [2.3, 4.5, 0.0]])\n"
" (option #1)\n"
" >>> # (option #1)\n"
" >>> distance = array([1.1, 2.3, 4.5])\n"
" (option #2)\n"
" >>> # (option #2)\n"
" >>> distance = [array([]),\n"
" ... array([1.1]),\n"
" ... array([2.3, 4.5])]\n"
" (option #3)\n"
" >>> # (option #3)\n"
"\n"
" These three correspond to the same distance matrix.\n"
"\n"
......@@ -1596,16 +1597,17 @@ static char treecluster__doc__[] =
"\n"
" Examples are:\n"
"\n"
" >>> from numpy import array\n"
" >>> distance = array([[0.0, 1.1, 2.3],\n"
" ... [1.1, 0.0, 4.5],\n"
" ... [2.3, 4.5, 0.0]])\n"
" # option 1.\n"
" >>> # option 1.\n"
" >>> distance = array([1.1, 2.3, 4.5])\n"
" # option 2.\n"
" >>> # option 2.\n"
" >>> distance = [array([]),\n"
" ... array([1.1]),\n"
" ... array([2.3, 4.5])]\n"
" # option 3.\n"
" >>> # option 3.\n"
"\n"
" These three correspond to the same distance matrix.\n"
"\n"
......@@ -2318,7 +2320,7 @@ static char pca__doc__[] =
"Adding the column means to the dot product of the coordinates and the\n"
"principal components, i.e.\n"
"\n"
" >>> columnmean + dot(coordinates, pc)\n"
" columnmean + dot(coordinates, pc)\n"
"\n"
"recreates the data matrix.\n";
......