Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -40,8 +40,14 @@ build_script:
  - SET PATH=C:\Py;C:\Py\Scripts;C:\Py\Library\bin;%PATH%
  - conda config --set always_yes yes
  - conda update conda
-  - conda install setuptools numpy mysql-connector-python psycopg2 matplotlib networkx reportlab scipy coverage
-  - if "PY_MAJOR_VER"=="2" conda install unittest2
+# Pinning pillow==5.4.1 as version 6 breaks on current latest reportlab
+# https://bitbucket.org/rptlab/reportlab/issues/176/incompatibility-with-pillow-600
+  - conda install setuptools numpy psycopg2 matplotlib networkx reportlab scipy coverage pillow==5.4.1
+
+# Pinning mysql-connector-python==8.0.13 for Python 3 as 8.0.16 breaks our tests
+# https://github.com/biopython/biopython/issues/2120
+# We don't install mysql-connector-python for Python 2
+  - if "%PY_MAJOR_VER%"=="3" conda install mysql-connector-python==8.0.13
  - python setup.py build

 test_script:

--- a/.flake8
+++ b/.flake8
+# =================================================
+# flake8:
+#   pycodestyle: E### (error), W### (warning)
+#   pyflake:     F### (error)
+# pydocstyle: D1## - Missing Docstrings
+#             D2## - Whitespace Issues
+#             D4## - Docstring Content issues
+# flake8-bugbear: B###
+# flake8-quotes: Q###
+# flake8-commas: C#### (in case installed locally)
+# flake8-black : BLK### (in case installed locally)
+# flake8-pie : PIE### (in case installed locally)
+# =================================================
+[flake8]
+doctests = True
+# Exclude some file types and folders that shouldn't be checked:
+exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.github,build,
+ignore =
+    # =============================================================
+    # Biopython's 'standard' ignores we can agree to always accept:
+    # =============================================================
+    D203, # 1 blank line required before class docstring
+          # deliberately ignore in favour of passing D211: No blank lines
+          # allowed before class docstring
+    W503, # line-break before binary operator
+          # deliberately ignore (in favour of some day enforcing W504?)
+    # ===========================================
+    # Ignores that we have to accept for a while:
+    # ===========================================
+    E123, # closing bracket does not match indentation of opening bracket's line
+          # TODO? (main/Bio/Tests: 3/91/31 occurrences)
+    E203, # whitespace before ':'
+          # gives false positives after running black, see
+          # https://github.com/PyCQA/pycodestyle/issues/373
+    E501, # line too long
+          # Maybe we find a sensible limit, e.g. 88 (black) and enforce it
+    W504, # line break after binary operator (Bio/Tests/Scripts: 225/119/7) TODO?
+    B007, # Loop control variable not used within the loop body.
+          # If this is intended, start the name with an underscore
+    # =========================================
+    # Optional ignores for local installations:
+    # =========================================
+    BLK100, # Black would make changes, only on local installations (so far)
+    PIE781, # Assigning to temp variable and then returning, not enforcing
+
+# ========================
+# Folder specific ignores:
+# ========================
+per-file-ignores =
+    Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815
+    Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812
+
+    # Due to a bug in flake8, we need the following lines for running the
+    # pre-commit hook. If you made edits above, please change also here!
+    /Bio/*:E122,E126,F401,F841,D105,B009,B010,B011,C812,C815
+    /Tests/*:F401,F841,D101,D102,D103,B009,B010,B011,C812
+
+# =============================
+# per-file-ignores error codes:
+# =============================
+#Bio/*:E122 continuation line missing indentation or outdented TODO? (264 occurrences)
+#      E126 continuation line over-indented for hanging indent TODO?  (54 occurrences)
+#      F401 module imported but unused TODO? (107 occurrences)
+#      F841 local variable is assigned to but never used TODO? (55 occurrences)
+#      D105 missing docstring magic method (121 occurrences)
+#      B009 do not call getattr with a constant attribute value,
+#           it is not any safer than normal property access
+#      B010 do not call setattr with a constant attribute value,
+#           it is not any safer than normal property access
+#      B011 do not call assert False since python -O removes these calls;
+#           instead callers should raise AssertionError().
+#      C812 missing trailing comma
+#      C815 missing trailing comma in Python 3.5+
+#Tests/*:F401 module imported but unused TODO? (88 occurrences)
+#        F841 local variable is assigned to but never used TODO? (64 occurrences)
+#        D101 missing docstring in public class (207 occurrences)
+#        D102 missing docstring in public method (956 occurrences)
+#        D103 missing docstring in public functions (52 occurrences)
+#        B009 do not call getattr with a constant attribute value,
+#             it is not any safer than normal property access
+#        B010 do not call setattr with a constant attribute value,
+#             it is not any safer than normal property access
+#        B011 do not call assert False since python -O removes these calls;
+#             instead callers should raise AssertionError()
+#        C812 missing trailing comma
+
+# =======================
+# flake8-quotes settings:
+# =======================
+inline-quotes = double
--- a/.gitattributes
+++ b/.gitattributes
@@ -21,3 +21,7 @@ Tests/SubsMat/acc_rep_mat.pik binary

 # MMTF is a binary file format,
 Tests/PDB/4CUP.mmtf binary
+
+# UCSC Nib files are binary:
+Tests/Nib/test_bigendian.nib binary
+Tests/Nib/test_littleendian.nib binary
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -19,6 +19,7 @@
 Bio/Alphabet @peterjc

 Bio/Align/* @peterjc
+Bio/Align/_aligners.c @mdehoon
 Bio/AlignIO/* @peterjc
 Tests/test_AlignIO* @peterjc


--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,9 +6,9 @@ This pull request addresses issue #...
 - [ ] I hereby agree to dual licence this and any previous contributions under both
 the _Biopython License Agreement_ **AND** the _BSD 3-Clause License_.

- [ ] I have read the ``CONTRIBUTING.rst`` file and understand that AppVeyor and
-TravisCI will be used to confirm the Biopython unit tests and ``flake8`` style
-checks pass with these changes.
+- [ ] I have read the ``CONTRIBUTING.rst`` file, have run ``flake8`` locally, and
+understand that AppVeyor and TravisCI will be used to confirm the Biopython unit
+tests and style checks pass with these changes.

 - [ ] I have added my name to the alphabetical contributors listings in the files
 ``NEWS.rst`` and ``CONTRIB.rst`` as part of this pull request, am listed

--- a/.github/deploy_docs.sh
+++ b/.github/deploy_docs.sh
+#!/bin/bash
+
+# Assumes being called from the Biopython repository's root folder,
+# (i.e. a clone of https://github.com/biopython/biopython) as part
+# of our continuous integration testing to save the compiled docs
+# to https://github.com/biopython/docs
+#
+# In order to have write permissions, we put a private key into the
+# TravisCI settings as a secure environment variable, and put the
+# matching public key into the GitHub documentation repository's
+# settings as a deploy key with write permissions.
+#
+# Key creation,
+#
+# $ ssh-keygen -t rsa -b 4096 -C "biopython documentation deploy key" -f biopython_doc_key -N ""
+# Generating public/private rsa key pair.
+# Your identification has been saved in biopython_doc_key.
+# Your public key has been saved in biopython_doc_key.pub.
+# The key fingerprint is:
+# SHA256:nFfhbwryDLDz8eDEHa4sjdH0gOgwyXGGDUBGfDi5luQ biopython documentation deploy key
+# The key's randomart image is:
+# +---[RSA 4096]----+
+# |===+o       .    |
+# |.B.*.. .   . .   |
+# |o X . o o . o    |
+# | E +   B * o .   |
+# |.   . + S *   o  |
+# |       X @ . o   |
+# |      o * + .    |
+# |       .         |
+# |                 |
+# +----[SHA256]-----+
+#
+# Next, we add the public key to https://github.com/biopython/docs as
+# a deployment key with write permission,
+#
+# $ cat biopython_doc_key.pub
+# ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDpQ3I6ZpL9cqUpqkHMPALTQg9ya3sL1MVXYjbTnuWnDoRml5UYXVgD8hgOJxwaxDo1BV+fKn68LXPEwlZ5FC6eRSCJz20SvWPkMDhAwChJJ+nE7f/vvK18R3Ge9ksWra8LFSR3EL7joQTN+c1VyaJH22qj1OED3G68Ix+bgvnUpZgeurV8vDV06FVx7H1Q5a5MoTWFdMa9wzJn5o6m7khditOTDKqznFULoOONpw7CsTiJD6drQPk1pwftDxEBMEAG7cKwux/dRWJtzsRQ7IO0d/AhzsqnLJJIgkHzQwmvpGpffWfoomNwF4bWJuWzu6tRcGcX16fLMyGK8kFJaL1zY6gQFkAbfsIdA2G28S79mIC4jT1JtiNYBOV9wIjxyZUyvzSeQGVC7yBafHE5eEb267dgGnDl654XzyIImLSKv/nx8No16UK/e5F+ds3hp0DPTknzeVOGBUEt1k8pEp47J9JVKoeceph0cJbfzFNv9pgOgyaHb1mhs9pI4kIQ3R+ibeAZbPWT709n26Y99Q2MSSZyPuZvX8VBA1NfoENmuTrEn/qqGlvZez3Blh4MIvYg24DFv/rHN92Edk5S7xY0eB7E6D6X/N80ThuBSqxlJpxSQlA+LICcq/EPd37/WT7exiheXysN5oIOvwNgUNNFftDWv2gPBu2bf/foHfAQKQ== biopython documentation deploy key
+#
+# Finally, we add the private key to TravisCI by going to
+# https://travis-ci.org/biopython/biopython/settings or any authorised
+# fork like https://travis-ci.org/peterjc/biopython/settings and
+# setting DOC_KEY to the following (secret) value:
+#
+# $ python -c "print(open('biopython_doc_key').read().strip().replace(' ', r'\ ').replace('\n', r'\\\n'))"
+# ...
+#
+# TravisCI requires we escape spaces as '\ ' and newlines as '\\n', and
+# we explicitly strip the trailing new line so that we don't get an extra
+# one when rebuilding the key later.
+#
+# Make sure "DISPLAY VALUE IN BUILD LOG" is off (the default).
+#
+# For testing locally, set local environment $DOC_KEY to this value.
+# Thereafter, when ever this script gets run on TravisCI it should
+# be able to deplop the HTML documentation to our documentation
+# repository (which will dispaly on biopython.org via GitHub pages).
+
+set -e
+
+if [ -z "$DOC_KEY" ]; then
+    echo "Missing (secret) environment variable DOC_KEY,"
+    echo "which should hold the private SSH deployment key."
+    false
+fi
+
+set -euo pipefail
+
+DEST_SLUG=biopython/docs
+# Could look at $TRAVIS_TAG, e.g. DEST_DIR=${TRAVIS_TAG:-dev}
+# However, tags lack the dots in the version number. Since
+# Biopython was installed to run Sphinx and build the docs,
+# can use this:
+DEST_DIR=`python -c "import Bio; v=Bio.__version__; print('dev' if 'dev' in v else v)"`
+SOURCE_DIR=${TRAVIS_BUILD_DIR:-$PWD}/Doc/api/_build/html
+WORKING_DIR=/tmp/deploy_biopython_docs
+
+if [ -z "$DEST_DIR" ]; then
+   echo "ERROR: Failed to get Biopython version, is it not installed?"
+   python -c "import Bio; print(Bio.__version__)"
+   false
+fi
+DEST_DIR=$DEST_DIR/api
+echo "Aiming to deploy $SOURCE_DIR to $DEST_SLUG branch gh-pages as $DEST_DIR"
+
+# On TravisCI, must create the variable using '\ ' and '\n', so
+# here we must unescape the whitespace to recover the SSH deploy key:
+python -c "import os; print(os.environ['DOC_KEY'].strip().replace(r'\ ', ' ').replace(r'\n', '\n'))" > $HOME/.biopython_doc_deploy.key
+# Check we have a sane looking line structure:
+if [ `grep -c "^\-\-\-\-\-" $HOME/.biopython_doc_deploy.key` -ne 2 ]; then
+    echo "ERROR: Failed to rebuild the SSH key,"
+    wc -l $HOME/.biopython_doc_deploy.key
+    md5sum $HOME/.biopython_doc_deploy.key
+    false
+fi
+chmod 600 $HOME/.biopython_doc_deploy.key
+export GIT_SSH=${TRAVIS_BUILD_DIR:-$PWD}/.github/ssh_via_deploy_key.sh
+
+if ! [[ -f "$GIT_SSH" ]]; then
+    echo "Error, set GIT_SSH="$GIT_SSH" but does not exist"
+    false
+elif ! [[ -x "$GIT_SSH" ]]; then
+    echo "Error, set GIT_SSH="$GIT_SSH" but not executable"
+    false
+fi;
+
+echo "Setting up clone of $DEST_SLUG locally at $WORKING_DIR"
+
+# Clone the destination under /tmp (public URL, no key needed)
+rm -rf $WORKING_DIR
+git clone https://github.com/$DEST_SLUG.git $WORKING_DIR
+pushd $WORKING_DIR
+git checkout gh-pages
+# Switch the git protocol to SSH based so we can use our key
+git remote set-url origin --push git@github.com:$DEST_SLUG.git
+popd
+
+echo "Copying $SOURCE_DIR/* to $WORKING_DIR/$DEST_DIR/ next"
+if [ ! -d $SOURCE_DIR ]; then
+    echo "ERROR: Directory $SOURCE_DIR/ does not exist."
+    false
+fi
+
+# Remove any old files
+pushd $WORKING_DIR
+if [ -d $DEST_DIR ]; then
+    echo "Removing old files"
+    git rm -r $DEST_DIR/
+fi
+mkdir -p $DEST_DIR
+echo "Copying files"
+cp -R $SOURCE_DIR/* $DEST_DIR/
+echo "Staging files in git"
+git add $DEST_DIR/
+
+if [[ -z $(git status --porcelain) ]]; then
+    echo "Nothing has changed, nothing needs pushing."
+else
+    echo "Making commit of new files"
+    git commit -m "Automated update ${TRAVIS_COMMIT:-}" --author "TravisCI <travisci@example.org>"
+    echo "Finally, pushing to $DEST_SLUG gh-pages branch"
+    git push origin gh-pages
+    echo "Documentation deployed!"
+fi
+
+popd
--- a/.github/ssh_via_deploy_key.sh
+++ b/.github/ssh_via_deploy_key.sh
+#!/bin/bash
+# Call ssh using our GitHub repository deploy key (set via -i)
+# using -F to make sure this ignores ~/.ssh/config
+ssh -i "$HOME/.biopython_doc_deploy.key" -F /dev/null -p 22 $*
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,9 @@ dist
 #Ignore all Jython class files (present if using Jython)
 *.class

+#Ignore any compiled C code
+*.so
+
 #Ignore the .tox directory from running tox locally
 .tox/

@@ -47,6 +50,9 @@ Tests/Graphics/*.eps
 Tests/Graphics/*.svg
 Tests/Graphics/*.png

+# This file is downloaded when testing the Bio.SeqIO.GckIO module.
+Tests/Gck/DGVC_GCK.zip
+
 #Ignore the local BioSQL test settings:
 Tests/biosql.ini

@@ -62,6 +68,7 @@ Doc/examples/tree1.nwk

 #Ignore LaTeX temp files, and compiled output
 Doc/*.aux
+Doc/*.gz
 Doc/*.log
 Doc/*.out
 Doc/*.toc
@@ -79,6 +86,7 @@ Doc/*/*/*.out
 Doc/*/*/*.toc
 Doc/*/*/*.haux
 Doc/*/*/*.htoc
+Doc/_minted-Tutorial/
 Doc/Tutorial.txt
 Doc/Tutorial.pdf
 Doc/Tutorial.html
@@ -94,9 +102,19 @@ Doc/hevea.sty
 Doc/*/hevea.sty
 Doc/*/*/hevea.sty

+#Ignore artifacts of building the docs
+_build
+Doc/api/*.rst
+
 #Ignore IntelliJ IDEA directory and project files
 .idea
 *.iml

-#Ignore unittest cache dirctory
+#Ignore unittest cache directory
 .cache/
+
+#Ignore vscode directory
+.vscode
+
+#Ignore mypy cache directory
+.mypy_cache
\ No newline at end of file
--- a/.travis-tox.ini
+++ b/.travis-tox.ini
 # This is a configuration file for tox, used to test
 # Biopython on various versions of Python etc under
-# the Travis Continous Integration service which is
+# the Travis Continuous Integration service which is
 # configured in the file .travis.yml
 #
 # By default tox will look for tox.ini, so this file
@@ -34,8 +34,8 @@ envlist =
    sdist
    bdist_wheel
    api
-    {py27,py34,py35,py36,pypy,pypy3}-cover
-    {py27,py34,py35,py36,pypy,pypy3}-nocov
+    {py27,py35,py36,py37,py38,pypy,pypy3}-cover
+    {py27,py35,py36,py37,py38,pypy,pypy3}-nocov

 [testenv]
 # TODO: Try tox default sdist based install instead:
@@ -56,21 +56,25 @@ whitelist_externals =
 install_command = pip install --only-binary=scipy {opts} {packages}
 deps =
    #Lines startings xxx: are filtered by the environment.
-    #Leaving py34 without any soft dependencies (just numpy)
+    #Leaving py36 without any dependencies (even numpy)
    cover: coverage
    cover: codecov
    py27: unittest2
    py27: mysql-python
-    py27,py36: mmtf-python
+    py27,py37: mmtf-python
+    # https://bitbucket.org/rptlab/reportlab/issues/176/incompatibility-with-pillow-600
    py27,py35: reportlab
-    py27,py34,py35,py36: psycopg2-binary
-    py27,py34,py35,py35: mysql-connector-python-rf
+    py27,py35: pillow==5.4
+    py27,py35,py37: psycopg2-binary
+    py27,py35: mysql-connector-python-rf
+    py35,py37: mysqlclient
    py27,py35,pypy: rdflib
    pypy,pypy3: numpy==1.12.1
-    py27,py34,py36: numpy
-    py36: scipy
+    pypy,pypy3: mysqlclient
+    py27,py35,py37: numpy
+    py37: scipy
    py27: networkx
-    py36: matplotlib
+    py37: matplotlib
 commands =
    #The bash call is a work around for special characters
    #The /dev/null is to hide the verbose output but leave warnings
@@ -95,17 +99,23 @@ deps =
    flake8-docstrings
    flake8-blind-except
    flake8-rst-docstrings
-    py34,py35,py36: flake8-bugbear
+    flake8-comprehensions
+    flake8-bugbear;python_version>="3.5"
+    flake8-quotes
    restructuredtext_lint
    doc8
+    pygments
+    # doc8 needs docutils, but docutils==0.15 has an import order bug
+    # https://bugs.launchpad.net/doc8/+bug/1837515
+    # https://sourceforge.net/p/docutils/bugs/366/
+    docutils==0.14
+    # flake8-docstrings uses a function removed in pydocstyle 4.0.0; once a fix
+    # is released in flake8-docstrings we can remove the following constraint:
+    pydocstyle<4.0.0
+    # See https://gitlab.com/pycqa/flake8-docstrings/issues/36
 commands =
-    flake8 --max-line-length 82 setup.py
-    # These folders each have their own .flake8 file:
-    flake8 BioSQL/
-    flake8 Scripts/
-    flake8 Doc/examples/
-    flake8 Bio/
-    flake8 Tests/
+    # PEP-8 and PEP-257 style checks:
+    flake8
    # Now do various checks on our RST files:
    # Calling via bash to get it to expand the wildcard for us
    bash -c \'rst-lint --level warning *.rst\'
@@ -153,7 +163,8 @@ deps =
    scipy
    sphinx>=1.8.0
    numpydoc
+    pygments
+    sphinx_rtd_theme
 commands =
    bash -c \'python setup.py install > /dev/null\'
-    bash -c \'mkdir -p Doc/api/_templates Doc/api/_static Doc/api/_build\'
    make -C Doc/api/ html
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,10 @@
 # - basics - quick things like style and packaging
 # - test - the actual functional tests which are slow

-dist: trusty
+dist: xenial
+services:
+  - mysql
+  - postgresql
 language: python
 cache: pip
 matrix:
@@ -21,42 +24,57 @@ matrix:
    - stage: basics
      python: 2.7
      env: TOXENV=style
+      services:
      addons:
        apt:
          packages:
      before_install: echo "Going to run basic checks"
    - stage: basics
-      python: 3.6
+      python: 3.7
      env: TOXENV=style
+      services:
      addons:
        apt:
          packages:
      before_install: echo "Going to run basic checks"
    - stage: basics
+      python: 3.7
      env: TOXENV=sdist,bdist_wheel
+      services:
      addons:
        apt:
          packages:
      before_install: echo "Going to run basic checks"
    - stage: test
-      python: 2.7
+      python: 3.7
      env: TOXENV=api
+      services:
      addons:
        apt:
          packages:
      before_install: echo "Going to build API docs"
+      deploy:
+        provider: script
+        script: .github/deploy_docs.sh
+        skip_cleanup: true
+        on:
+          branch: master
    - stage: test
      python: 2.7
      env: TOXENV=py27-cover
-    - stage: test
-      python: 3.4
-      env: TOXENV=py34-cover
    - stage: test
      python: 3.5
      env: TOXENV=py35-cover
    - stage: test
      python: 3.6
      env: TOXENV=py36-cover
+    - stage: test
+      python: 3.7
+      env: TOXENV=py37-cover
+    - stage: test
+      # TODO: Change this once a stable Python 3.8 is on TravisCI:
+      python: 3.8-dev
+      env: TOXENV=py38-cover
    - stage: test
      python: pypy
      env: TOXENV=pypy-nocov
@@ -111,33 +129,6 @@ before_install:
  # Setup environment for t-coffee
  - mkdir -p $HOME/tcoffee_temp
  - export HOME_4_TCOFFEE=$HOME/tcoffee_temp
-  # There are TravisCI provided versions of PyPy and PyPy3, but currently too old.
-  # We therefore deactivate that, and download and unzip portable PyPy binaries.
-  - |
-    if [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then
-        deactivate
-        wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-6.0.0-linux_x86_64-portable.tar.bz2
-        tar -jxvf pypy-6.0.0-linux_x86_64-portable.tar.bz2
-        echo 'Setting up aliases...'
-        cd pypy-6.0.0-linux_x86_64-portable/bin/
-        export PATH=$PWD:$PATH
-        ln -s pypy python
-        echo 'Setting up pip...'
-        ./pypy -m ensurepip
-    fi
-  - |
-    if [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then
-        deactivate
-        wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.5-6.0.0-linux_x86_64-portable.tar.bz2
-        tar -jxvf pypy3.5-6.0.0-linux_x86_64-portable.tar.bz2
-        echo 'Setting up aliases...'
-        cd pypy3.5-6.0.0-linux_x86_64-portable/bin/
-        export PATH=$PWD:$PATH
-        ln -s pypy3 python
-        echo 'Setting up pip...'
-        ./pypy3 -m ensurepip
-        ln -s pip3 pip
-    fi
  - popd
  - cp Tests/biosql.ini.sample Tests/biosql.ini
  - psql -c "create database biosql_test;" -U postgres
@@ -152,6 +143,7 @@ install:
  - tox -c .travis-tox.ini -e $TOXENV --notest

 script:
+  - python --version
  - travis_wait 30 tox -c .travis-tox.ini -e $TOXENV

 notifications:

--- a/Bio/.flake8
+++ b/Bio/.flake8
-[flake8]
-# Would like to enable this in future...
-# doctests = True
-ignore =
-    # =======================
-    # flake: E###, F###, W###
-    # =======================
-    # pycodestyle v2.4.0 default ignore is E121,E123,E126,E226,E24,E704,W503,W504
-    # flake8 v3.6.0 default ignore is      E121,E123,E126,E226,E24,E704,W503,W504
-    # These are ignored by default:
-    E122,E123,E126,W503,W504,
-    # These are not ignored by default:
-    # E127	continuation line over-indented for visual indent
-    # E128	continuation line under-indented for visual indent
-    # E501	line too long (XX > 79 characters)
-    # E731	do not assign a lambda expression, use a def
-    # F401	module imported but unused
-    # F841	local variable name is assigned to but never used
-    # TODO: Fix some of these?
-    E501,E731,F401,F841,
-    # =====================================
-    # pydocstyle: D1## - Missing Docstrings
-    # =====================================
-    # D100	Missing docstring in public module
-    # D101	Missing docstring in public class
-    # D102	Missing docstring in public method
-    # D103	Missing docstring in public function
-    # D105	Missing docstring in magic method
-    # TODO: Fix some of these?
-    D100,D101,D102,D103,D105,
-    # ====================================
-    # pydocstyle: D2## - Whitespace Issues
-    # ====================================
-    # D203	1 blank line required before class docstring
-    # D211	No blank lines allowed before class docstring
-    # We ignore D203 deliberately in favour of passing D211,
-    D203,
-    # ====================
-    # flake8-bugbear: B###
-    # ====================
-    # B007	Loop control variable not used within the loop body.
-    #           If this is intended, start the name with an underscore.
-    B007,
-    # ================================================
-    # flake8-commas: C#### (in case installed locally)
-    # ================================================
-    # C812	missing trailing comma
-    # C815	missing trailing comma in Python 3.5+
-    C812,C815
--- a/Bio/Affy/CelFile.py
+++ b/Bio/Affy/CelFile.py
@@ -29,7 +29,7 @@ class ParserError(ValueError):

 _modeError = ParserError("You're trying to open an Affymetrix v4"
                         " CEL file. You have to use a read binary mode,"
-                         " like this `open(filename \"rb\")`.")
+                         " like this: open(filename, 'rb')")

 # for debugging
 # import pprint
@@ -164,14 +164,14 @@ def read_v4(f):
    # http://www.affymetrix.com/estore/support/developer/powertools/changelog/gcos-agcc/cel.html.affx
    record = Record()
    preHeaders = ["magic", "version", "columns", "rows", "cellNo", "headerLen"]
-    preHeadersMap = dict()
-    headersMap = dict()
+    preHeadersMap = {}
+    headersMap = {}

    # load pre-headers
    try:
        for name in preHeaders:
            preHeadersMap[name] = struct.unpack("<i", f.read(4))[0]
-    except UnicodeDecodeError as e:
+    except UnicodeDecodeError:
        raise _modeError

    char = f.read(preHeadersMap["headerLen"])
@@ -232,7 +232,7 @@ def read_v4(f):
    raiseBadHeader("OffsetY", 0)

    # This is unfortunately undocumented, but it turns out that real data has
-    # the `record.AlgorithmParameters` repeated in the data section, until an
+    # the record.AlgorithmParameters repeated in the data section, until an
    # EOF, i.e. b"\x04".
    char = b"\x00"
    safetyValve = 10**4

--- a/Bio/Align/AlignInfo.py
+++ b/Bio/Align/AlignInfo.py
@@ -71,7 +71,7 @@ class SummaryInfo(object):

        """
        # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X"
-        consensus = ''
+        consensus = ""

        # find the length of the consensus we are creating
        con_len = self.alignment.get_alignment_length()
@@ -86,7 +86,7 @@ class SummaryInfo(object):
                # make sure we haven't run past the end of any sequences
                # if they are of different lengths
                if n < len(record.seq):
-                    if record.seq[n] != '-' and record.seq[n] != '.':
+                    if record.seq[n] != "-" and record.seq[n] != ".":
                        if record.seq[n] not in atom_dict:
                            atom_dict[record.seq[n]] = 1
                        else:
@@ -132,7 +132,7 @@ class SummaryInfo(object):

        """
        # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X"
-        consensus = ''
+        consensus = ""

        # find the length of the consensus we are creating
        con_len = self.alignment.get_alignment_length()
@@ -270,8 +270,8 @@ class SummaryInfo(object):
                rep_dict = self._pair_replacement(
                    self.alignment[rec_num1].seq,
                    self.alignment[rec_num2].seq,
-                    self.alignment[rec_num1].annotations.get('weight', 1.0),
-                    self.alignment[rec_num2].annotations.get('weight', 1.0),
+                    self.alignment[rec_num1].annotations.get("weight", 1.0),
+                    self.alignment[rec_num2].annotations.get("weight", 1.0),
                    rep_dict, skip_items)

        return rep_dict
@@ -363,7 +363,7 @@ class SummaryInfo(object):
        if isinstance(self.alignment._alphabet, Alphabet.Gapped):
            skip_items.append(self.alignment._alphabet.gap_char)
            all_letters = all_letters.replace(
-                self.alignment._alphabet.gap_char, '')
+                self.alignment._alphabet.gap_char, "")

        # now create the dictionary
        for first_letter in all_letters:
@@ -408,7 +408,7 @@ class SummaryInfo(object):
            chars_to_ignore.append(self.alignment._alphabet.gap_char)

        for char in chars_to_ignore:
-            all_letters = all_letters.replace(char, '')
+            all_letters = all_letters.replace(char, "")

        if axis_seq:
            left_seq = axis_seq
@@ -429,7 +429,7 @@ class SummaryInfo(object):
                    this_residue = None

                if this_residue and this_residue not in chars_to_ignore:
-                    weight = record.annotations.get('weight', 1.0)
+                    weight = record.annotations.get("weight", 1.0)
                    try:
                        score_dict[this_residue] += weight
                    # if we get a KeyError then we have an alphabet problem
@@ -521,7 +521,7 @@ class SummaryInfo(object):
        # determine all of the letters we have to deal with
        all_letters = self._get_all_letters()
        for char in chars_to_ignore:
-            all_letters = all_letters.replace(char, '')
+            all_letters = all_letters.replace(char, "")

        info_content = {}
        for residue_num in range(start, end):
@@ -584,7 +584,7 @@ class SummaryInfo(object):
        for record in all_records:
            try:
                if record.seq[residue_num] not in to_ignore:
-                    weight = record.annotations.get('weight', 1.0)
+                    weight = record.annotations.get("weight", 1.0)
                    freq_info[record.seq[residue_num]] += weight
                    total_count += weight
            # getting a key error means we've got a problem with the alphabet
@@ -676,6 +676,7 @@ class SummaryInfo(object):
        return total_info

    def get_column(self, col):
+        """Return column of alignment."""
        # TODO - Deprecate this and implement slicing?
        return self.alignment[:, col]


--- a/Bio/Align/Applications/_Dialign.py
+++ b/Bio/Align/Applications/_Dialign.py
@@ -58,12 +58,12 @@ class DialignCommandline(AbstractCommandline):
                    "Anchored alignment. Requires a file <seq_file>.anc "
                    "containing anchor points."),
            _Switch(["-cs", "cs"],
-                    "If segments are translated, not only the `Watson "
-                    "strand' but also the `Crick strand' is looked at."),
+                    "If segments are translated, not only the 'Watson "
+                    "strand' but also the 'Crick strand' is looked at."),
            _Switch(["-cw", "cw"],
                    "Additional output file in CLUSTAL W format."),
            _Switch(["-ds", "ds"],
-                    "`dna alignment speed up' - non-translated nucleic acid "
+                    "'dna alignment speed up' - non-translated nucleic acid "
                    "fragments are taken into account only if they start "
                    "with at least two matches. Speeds up DNA alignment at "
                    "the expense of sensitivity."),
@@ -89,7 +89,7 @@ class DialignCommandline(AbstractCommandline):
                    "This option speeds up the alignment but may lead "
                    "to reduced alignment quality."),
            _Switch(["-lgs", "lgs"],
-                    "`long genomic sequences' - combines the following "
+                    "'long genomic sequences' - combines the following "
                    "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
                    "-fop, -ff, -cs, -ds, -pst "),
            _Switch(["-lgs_t", "lgs_t"],
@@ -99,7 +99,7 @@ class DialignCommandline(AbstractCommandline):
                    "-lgs but not very sensitive for non-coding regions."),
            _Option(["-lmax", "lmax"],
                    "Maximum fragment length = x  (default: x = 40 or "
-                    "x = 120 for `translated' fragments). Shorter x "
+                    "x = 120 for 'translated' fragments). Shorter x "
                    "speeds up the program but may affect alignment quality.",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
@@ -108,11 +108,11 @@ class DialignCommandline(AbstractCommandline):
                    "about fragments selected for pairwise alignment and "
                    "about consistency in multi-alignment procedure."),
            _Switch(["-ma", "ma"],
-                    "`mixed alignments' consisting of P-fragments and "
+                    "'mixed alignments' consisting of P-fragments and "
                    "N-fragments if nucleic acid sequences are aligned."),
            _Switch(["-mask", "mask"],
                    "Residues not belonging to selected fragments are "
-                    "replaced by `*' characters in output alignment "
+                    "replaced by '*' characters in output alignment "
                    "(rather than being printed in lower-case characters)"),
            _Switch(["-mat", "mat"],
                    "Creates file *mat with substitution counts derived "
@@ -135,10 +135,10 @@ class DialignCommandline(AbstractCommandline):
                    "No translation of fragments."),
            _Switch(["-nt", "nt"],
                    "Input sequences are nucleic acid sequences and "
-                    "`nucleic acid segments' are translated to `peptide "
+                    "'nucleic acid segments' are translated to 'peptide "
                    "segments'."),
            _Switch(["-nta", "nta"],
-                    "`no textual alignment' - textual alignment suppressed. "
+                    "'no textual alignment' - textual alignment suppressed. "
                    "This option makes sense if other output files are of "
                    "interest -- e.g. the fragment files created with -ff, "
                    "-fop, -fsm or -lo."),
@@ -161,7 +161,7 @@ class DialignCommandline(AbstractCommandline):
                    "alignment or alignment of translated DNA fragments "
                    "at the expense of sensitivity."),
            _Option(["-stars", "stars"],
-                    "Maximum number of `*' characters indicating degree "
+                    "Maximum number of '*' characters indicating degree "
                    "of local similarity among sequences. By default, no "
                    "stars are used but numbers between 0 and 9, instead.",
                    checker_function=lambda x: x in range(0, 10),

--- a/Bio/Align/Applications/_Mafft.py
+++ b/Bio/Align/Applications/_Mafft.py
@@ -281,7 +281,7 @@ class MafftCommandline(AbstractCommandline):
            _Option(["--tm", "tm"],
                    "Transmembrane PAM number (Jones et al. 1994) "
                    "matrix is used. number>0. Default: BLOSUM62",
-                    filename=True,
+                    filename=True,  # to ensure spaced inputs are quoted
                    equate=False),
            # Use a user-defined AA scoring matrix. The format of matrixfile is
            # the same to that of BLAST. Ignored when nucleotide sequences are
@@ -289,7 +289,7 @@ class MafftCommandline(AbstractCommandline):
            _Option(["--aamatrix", "aamatrix"],
                    "Use a user-defined AA scoring matrix. "
                    "Default: BLOSUM62",
-                    filename=True,
+                    filename=True,  # to ensure spaced inputs are quoted
                    equate=False),
            # Incorporate the AA/nuc composition information into the scoring
            # matrix. Default: off

--- a/Bio/Align/Applications/_Muscle.py
+++ b/Bio/Align/Applications/_Muscle.py
@@ -52,7 +52,12 @@ class MuscleCommandline(AbstractCommandline):
            ["pctid_kimura", "pctid_log"]
        OBJECTIVE_SCORES = ["sp", "ps", "dp", "xp", "spf", "spm"]
        TREE_ROOT_METHODS = ["pseudo", "midlongestspan", "minavgleafdist"]
-        SEQUENCE_TYPES = ["protein", "nucleo", "auto"]
+
+        # The mucleotide arguments for  the sequence type parameter in MUSCLE (-seqtype)
+        # were updated at somepoint in MUSCLE version 3.8. Prior to the update
+        # 'nucleo' was used for nucleotide. This has been updated to 'rna' and 'dna'. 'nucleo' kept for
+        # backwards compatibility with older MUSCLE versions.
+        SEQUENCE_TYPES = ["protein", "rna", "dna", "nucleo", "auto"]
        WEIGHTING_SCHEMES = ["none", "clustalw", "henikoff", "henikoffpb",
                             "gsc", "threeway"]
        self.parameters = [
@@ -304,8 +309,10 @@ class MuscleCommandline(AbstractCommandline):
                    filename=True,
                    equate=False),
            # seqtype         protein              auto     Sequence type.
-            #                nucleo
+            #                 dna (MUSCLE version > 3.8)
+            #                 rna (MUSCLE version > 3.8)
            #                 auto
+            #                 nucleo (only valid for MUSCLE versions < 3.8)
            _Option(["-seqtype", "seqtype"],
                    "Sequence type",
                    checker_function=lambda x: x in SEQUENCE_TYPES,

--- a/Bio/Align/Applications/_TCoffee.py
+++ b/Bio/Align/Applications/_TCoffee.py
@@ -57,10 +57,7 @@ class TCoffeeCommandline(AbstractCommandline):
                    One (or more separated by a comma) of:
                    'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
                    'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'
-
-                    Note that of these Biopython's AlignIO module will only
-                    read clustalw, pir, and fasta.
-                    """,  # TODO - Can we read the PHYLIP output?
+                    """,
                    equate=False),
            _Option(["-infile", "infile"],
                    "Specify the input file.",

--- a/Bio/Align/__init__.py
+++ b/Bio/Align/__init__.py
@@ -15,18 +15,17 @@ class, used in the Bio.AlignIO module.
 from __future__ import print_function

 import sys  # Only needed to check if we are using Python 2 or 3
+
+from Bio._py3k import raise_from
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord, _RestrictedDict
 from Bio import Alphabet

-try:
 from Bio.Align import _aligners
-except ImportError as e:
-    new_exc = ImportError("{}: you should not import directly from the "
-                          "biopython source directory; please exit the source "
-                          "tree and re-launch your code from there".format(e))
-    new_exc.__cause__ = None
-    raise new_exc
+# Import errors may occur here if a compiled aligners.c file
+# (_aligners.pyd or _aligners.so) is missing or if the user is
+# importing from within the Biopython source tree, see PR #2007:
+# https://github.com/biopython/biopython/pull/2007


 class MultipleSeqAlignment(object):
@@ -460,12 +459,12 @@ class MultipleSeqAlignment(object):
        # in BioPerl, but I'm not positive what the best thing to do on
        # this is...
        if start:
-            new_record.annotations['start'] = start
+            new_record.annotations["start"] = start
        if end:
-            new_record.annotations['end'] = end
+            new_record.annotations["end"] = end

        # another hack to add weight information to the sequence
-        new_record.annotations['weight'] = weight
+        new_record.annotations["weight"] = weight

        self._records.append(new_record)

@@ -681,11 +680,11 @@ class MultipleSeqAlignment(object):
        alpha = Alphabet._consensus_alphabet([self._alphabet, other._alphabet])
        merged = (left + right for left, right in zip(self, other))
        # Take any common annotation:
-        annotations = dict()
+        annotations = {}
        for k, v in self.annotations.items():
            if k in other.annotations and other.annotations[k] == v:
                annotations[k] = v
-        column_annotations = dict()
+        column_annotations = {}
        for k, v in self.column_annotations.items():
            if k in other.column_annotations:
                column_annotations[k] = v + other.column_annotations[k]
@@ -970,11 +969,18 @@ class PairwiseAlignment(object):
        return self.path >= other.path

    def __format__(self, format_spec):
-        if format_spec == 'psl':
+        if format_spec == "psl":
            return self._format_psl()
        return str(self)

    def __str__(self):
+        if isinstance(self.query, str) and isinstance(self.target, str):
+            return self.format()
+        else:
+            return self._format_generalized()
+
+    def format(self):
+        """Create a human-readable representation of the alignment."""
        query = self.query
        target = self.target
        try:
@@ -1000,23 +1006,23 @@ class PairwiseAlignment(object):
        end1, end2 = path[0]
        if end1 > 0 or end2 > 0:
            end = max(end1, end2)
-            aligned_seq1 += "." * (end - end1) + seq1[:end1]
-            aligned_seq2 += "." * (end - end2) + seq2[:end2]
-            pattern += '.' * end
+            aligned_seq1 += " " * (end - end1) + seq1[:end1]
+            aligned_seq2 += " " * (end - end2) + seq2[:end2]
+            pattern += " " * end
        start1 = end1
        start2 = end2
        for end1, end2 in path[1:]:
            gap = 0
            if end1 == start1:
                gap = end2 - start2
-                aligned_seq1 += '-' * gap
+                aligned_seq1 += "-" * gap
                aligned_seq2 += seq2[start2:end2]
-                pattern += '-' * gap
+                pattern += "-" * gap
            elif end2 == start2:
                gap = end1 - start1
                aligned_seq1 += seq1[start1:end1]
-                aligned_seq2 += '-' * gap
-                pattern += '-' * gap
+                aligned_seq2 += "-" * gap
+                pattern += "-" * gap
            else:
                s1 = seq1[start1:end1]
                s2 = seq2[start2:end2]
@@ -1024,17 +1030,90 @@ class PairwiseAlignment(object):
                aligned_seq2 += s2
                for c1, c2 in zip(s1, s2):
                    if c1 == c2:
-                        pattern += '|'
+                        pattern += "|"
                    else:
-                        pattern += 'X'
+                        pattern += "."
            start1 = end1
            start2 = end2
        n1 -= end1
        n2 -= end2
        n = max(n1, n2)
-        aligned_seq1 += seq1[end1:] + '.' * (n - n1)
-        aligned_seq2 += seq2[end2:] + '.' * (n - n2)
-        pattern += '.' * n
+        aligned_seq1 += seq1[end1:] + " " * (n - n1)
+        aligned_seq2 += seq2[end2:] + " " * (n - n2)
+        pattern += " " * n
+        return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2)
+
+    def _format_generalized(self):
+        seq1 = self.target
+        seq2 = self.query
+        n1 = len(seq1)
+        n2 = len(seq2)
+        aligned_seq1 = []
+        aligned_seq2 = []
+        pattern = []
+        path = self.path
+        end1, end2 = path[0]
+        if end1 > 0 or end2 > 0:
+            if end1 <= end2:
+                for c2 in seq2[:end2 - end1]:
+                    s2 = str(c2)
+                    s1 = " " * len(s2)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s1)
+            else:  # end1 > end2
+                for c1 in seq1[:end1 - end2]:
+                    s1 = str(c1)
+                    s2 = " " * len(s1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s2)
+        start1 = end1
+        start2 = end2
+        for end1, end2 in path[1:]:
+            if end1 == start1:
+                for c2 in seq2[start2:end2]:
+                    s2 = str(c2)
+                    s1 = "-" * len(s2)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s1)
+                start2 = end2
+            elif end2 == start2:
+                for c1 in seq1[start1:end1]:
+                    s1 = str(c1)
+                    s2 = "-" * len(s1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s2)
+                start1 = end1
+            else:
+                for c1, c2 in zip(seq1[start1:end1], seq2[start2:end2]):
+                    s1 = str(c1)
+                    s2 = str(c2)
+                    m1 = len(s1)
+                    m2 = len(s2)
+                    if c1 == c2:
+                        p = "|"
+                    else:
+                        p = "."
+                    if m1 < m2:
+                        space = (m2 - m1) * " "
+                        s1 += space
+                        pattern.append(p * m1 + space)
+                    elif m1 > m2:
+                        space = (m1 - m2) * " "
+                        s2 += space
+                        pattern.append(p * m2 + space)
+                    else:
+                        pattern.append(p * m1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                start1 = end1
+                start2 = end2
+        aligned_seq1 = " ".join(aligned_seq1)
+        aligned_seq2 = " ".join(aligned_seq2)
+        pattern = " ".join(pattern)
        return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2)

    def _format_psl(self):
@@ -1073,7 +1152,7 @@ class PairwiseAlignment(object):
        blockSizes = []
        qStarts = []
        tStarts = []
-        strand = '+'
+        strand = "+"
        start1 = 0
        start2 = 0
        start1, start2 = self.path[0]
@@ -1104,7 +1183,7 @@ class PairwiseAlignment(object):
                qStarts.append(start2)
                blockSizes.append(count1)
                for c1, c2 in zip(seq1[start1:end1], seq2[start2:end2]):
-                    if c1 == 'N' or c2 == 'N':
+                    if c1 == "N" or c2 == "N":
                        Ns += 1
                    elif c1 == c2:
                        match += 1
@@ -1141,6 +1220,96 @@ class PairwiseAlignment(object):
        line = "\t".join(words) + "\n"
        return line

+    @property
+    def aligned(self):
+        """Return the indices of subsequences aligned to each other.
+
+        This property returns the start and end indices of subsequences
+        in the target and query sequence that were aligned to each other.
+        If the alignment between target (t) and query (q) consists of N
+        chunks, you get two tuples of length N:
+
+            (((t_start1, t_end1), (t_start2, t_end2), ..., (t_startN, t_endN)),
+             ((q_start1, q_end1), (q_start2, q_end2), ..., (q_startN, q_endN)))
+
+        For example,
+
+        >>> from Bio import Align
+        >>> aligner = Align.PairwiseAligner()
+        >>> alignments = aligner.align("GAACT", "GAT")
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        GAACT
+        ||--|
+        GA--T
+        <BLANKLINE>
+        >>> alignment.aligned
+        (((0, 2), (4, 5)), ((0, 2), (2, 3)))
+        >>> alignment = alignments[1]
+        >>> print(alignment)
+        GAACT
+        |-|-|
+        G-A-T
+        <BLANKLINE>
+        >>> alignment.aligned
+        (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3)))
+
+        Note that different alignments may have the same subsequences
+        aligned to each other. In particular, this may occur if alignments
+        differ from each other in terms of their gap placement only:
+
+        >>> aligner.mismatch_score = -10
+        >>> alignments = aligner.align("AAACAAA", "AAAGAAA")
+        >>> len(alignments)
+        2
+        >>> print(alignments[0])
+        AAAC-AAA
+        |||--|||
+        AAA-GAAA
+        <BLANKLINE>
+        >>> alignments[0].aligned
+        (((0, 3), (4, 7)), ((0, 3), (4, 7)))
+        >>> print(alignments[1])
+        AAA-CAAA
+        |||--|||
+        AAAG-AAA
+        <BLANKLINE>
+        >>> alignments[1].aligned
+        (((0, 3), (4, 7)), ((0, 3), (4, 7)))
+
+        The property can be used to identify alignments that are identical
+        to each other in terms of their aligned sequences.
+        """
+        segments1 = []
+        segments2 = []
+        if sys.version_info[0] > 2:
+            i1, i2 = self.path[0]
+            for node in self.path[1:]:
+                j1, j2 = node
+                if j1 > i1 and j2 > i2:
+                    segment1 = (i1, j1)
+                    segment2 = (i2, j2)
+                    segments1.append(segment1)
+                    segments2.append(segment2)
+                i1, i2 = j1, j2
+        else:
+            # Python 2: convert all long ints to ints to be consistent
+            # with the doctests
+            i1, i2 = self.path[0]
+            i1 = int(i1)
+            i2 = int(i2)
+            for node in self.path[1:]:
+                j1, j2 = node
+                j1 = int(j1)
+                j2 = int(j2)
+                if j1 > i1 and j2 > i2:
+                    segment1 = (i1, j1)
+                    segment2 = (i2, j2)
+                    segments1.append(segment1)
+                    segments2.append(segment2)
+                i1, i2 = j1, j2
+        return tuple(segments1), tuple(segments2)
+

 class PairwiseAlignments(object):
    """Implements an iterator over pairwise alignments returned by the aligner.
@@ -1187,7 +1356,7 @@ class PairwiseAlignments(object):
            try:
                alignment = next(self)
            except StopIteration:
-                raise IndexError('index out of range')
+                raise_from(IndexError("index out of range"), None)
        return alignment

    def __iter__(self):
@@ -1233,60 +1402,60 @@ class PairwiseAligner(_aligners.PairwiseAligner):

    >>> from Bio import Align
    >>> aligner = Align.PairwiseAligner()
-    >>> alignments = aligner.align("ACCGT", "ACG")
+    >>> alignments = aligner.align("TACCG", "ACG")
    >>> for alignment in sorted(alignments):
    ...     print("Score = %.1f:" % alignment.score)
    ...     print(alignment)
    ...
    Score = 3.0:
-    ACCGT
-    |-||-
-    A-CG-
+    TACCG
+    -|-||
+    -A-CG
    <BLANKLINE>
    Score = 3.0:
-    ACCGT
-    ||-|-
-    AC-G-
+    TACCG
+    -||-|
+    -AC-G
    <BLANKLINE>

    Specify the aligner mode as local to generate local alignments:

    >>> aligner.mode = 'local'
-    >>> alignments = aligner.align("ACCGT", "ACG")
+    >>> alignments = aligner.align("TACCG", "ACG")
    >>> for alignment in sorted(alignments):
    ...     print("Score = %.1f:" % alignment.score)
    ...     print(alignment)
    ...
    Score = 3.0:
-    ACCGT
-    |-||.
-    A-CG.
+    TACCG
+     |-||
+     A-CG
    <BLANKLINE>
    Score = 3.0:
-    ACCGT
-    ||-|.
-    AC-G.
+    TACCG
+     ||-|
+     AC-G
    <BLANKLINE>

    Do a global alignment.  Identical characters are given 2 points,
    1 point is deducted for each non-identical character.

    >>> aligner.mode = 'global'
-    >>> aligner.match = 2
-    >>> aligner.mismatch = -1
-    >>> for alignment in aligner.align("ACCGT", "ACG"):
+    >>> aligner.match_score = 2
+    >>> aligner.mismatch_score = -1
+    >>> for alignment in aligner.align("TACCG", "ACG"):
    ...     print("Score = %.1f:" % alignment.score)
    ...     print(alignment)
    ...
    Score = 6.0:
-    ACCGT
-    ||-|-
-    AC-G-
+    TACCG
+    -||-|
+    -AC-G
    <BLANKLINE>
    Score = 6.0:
-    ACCGT
-    |-||-
-    A-CG-
+    TACCG
+    -|-||
+    -A-CG
    <BLANKLINE>

    Same as above, except now 0.5 points are deducted when opening a
@@ -1296,27 +1465,27 @@ class PairwiseAligner(_aligners.PairwiseAligner):
    >>> aligner.extend_gap_score = -0.1
    >>> aligner.target_end_gap_score = 0.0
    >>> aligner.query_end_gap_score = 0.0
-    >>> for alignment in aligner.align("ACCGT", "ACG"):
+    >>> for alignment in aligner.align("TACCG", "ACG"):
    ...     print("Score = %.1f:" % alignment.score)
    ...     print(alignment)
    ...
    Score = 5.5:
-    ACCGT
-    |-||-
-    A-CG-
+    TACCG
+    -|-||
+    -A-CG
    <BLANKLINE>
    Score = 5.5:
-    ACCGT
-    ||-|-
-    AC-G-
+    TACCG
+    -||-|
+    -AC-G
    <BLANKLINE>

    The alignment function can also use known matrices already included in
    Biopython:

-    >>> from Bio.SubsMat import MatrixInfo
+    >>> from Bio.Align import substitution_matrices
    >>> aligner = Align.PairwiseAligner()
-    >>> aligner.substitution_matrix = MatrixInfo.blosum62
+    >>> aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
    >>> alignments = aligner.align("KEVLA", "EVL")
    >>> alignments = list(alignments)
    >>> print("Number of alignments: %d" % len(alignments))
@@ -1332,15 +1501,28 @@ class PairwiseAligner(_aligners.PairwiseAligner):

    """

+    def __setattr__(self, key, value):
+        if key not in dir(_aligners.PairwiseAligner):
+            # To prevent confusion, don't allow users to create new attributes
+            message = "'PairwiseAligner' object has no attribute '%s'" % key
+            raise AttributeError(message)
+        _aligners.PairwiseAligner.__setattr__(self, key, value)
+
    def align(self, seqA, seqB):
+        """Return the alignments of two sequences using PairwiseAligner."""
+        if isinstance(seqA, Seq):
            seqA = str(seqA)
+        if isinstance(seqB, Seq):
            seqB = str(seqB)
        score, paths = _aligners.PairwiseAligner.align(self, seqA, seqB)
        alignments = PairwiseAlignments(seqA, seqB, score, paths)
        return alignments

    def score(self, seqA, seqB):
+        """Return the alignments score of two sequences using PairwiseAligner."""
+        if isinstance(seqA, Seq):
            seqA = str(seqA)
+        if isinstance(seqB, Seq):
            seqB = str(seqB)
        return _aligners.PairwiseAligner.score(self, seqA, seqB)


--- a/Bio/Align/_aligners.c
+++ b/Bio/Align/_aligners.c
--- a/Bio/Align/substitution_matrices/__init__.py
+++ b/Bio/Align/substitution_matrices/__init__.py
+"""Substitution matrices."""
+
+import os
+import string
+import numpy
+
+from Bio import File
+from Bio import BiopythonExperimentalWarning
+from Bio._py3k import raise_from
+
+# These two can be removed once we drop python2:
+import sys
+import platform
+
+import warnings
+warnings.warn("Bio.Align.substitution_matrices is an experimental module "
+              "which may still undergo significant changes. In particular, "
+              "the location of this module may change, and the Array class "
+              "defined in this module may be moved to other existing or new "
+              "modules in Biopython.",
+              BiopythonExperimentalWarning)
+
+
+class Array(numpy.ndarray):
+    """numpy array subclass indexed by integers and by letters."""
+
+    def __new__(cls, alphabet=None, dims=None, data=None, dtype=float):
+        """Create a new Array instance."""
+        if isinstance(data, dict):
+            if alphabet is not None:
+                raise ValueError("alphabet should be None if data is a dict")
+            if dims is not None:
+                raise ValueError("dims should be None if data is a dict")
+            alphabet = []
+            for key in data:
+                if isinstance(key, str):
+                    if dims is None:
+                        dims = 1
+                    elif dims != 1:
+                        raise ValueError("inconsistent dimensions in data")
+                    alphabet.append(key)
+                elif isinstance(key, tuple):
+                    single_letters = True
+                    if dims is None:
+                        dims = len(key)
+                    elif dims != len(key):
+                        raise ValueError("inconsistent dimensions in data")
+                    if dims == 1:
+                        if not isinstance(key, str):
+                            raise ValueError("expected string")
+                        if len(key) > 1:
+                            single_letters = False
+                        alphabet.append(key)
+                    elif dims == 2:
+                        for letter in key:
+                            if not isinstance(letter, str):
+                                raise ValueError("expected string")
+                            if len(letter) > 1:
+                                single_letters = False
+                            alphabet.append(letter)
+                    else:
+                        raise ValueError("data array should be 1- or 2- "
+                                         "dimensional (found %d dimensions) "
+                                         "in key" % dims)
+            alphabet = sorted(set(alphabet))
+            if single_letters:
+                alphabet = "".join(alphabet)
+            else:
+                alphabet = tuple(alphabet)
+            n = len(alphabet)
+            if dims == 1:
+                shape = (n, )
+            elif dims == 2:
+                shape = (n, n)
+            else:  # dims is None
+                raise ValueError("data is an empty dictionary")
+            obj = super(Array, cls).__new__(cls, shape, dtype)
+            if dims == 1:
+                for i, key in enumerate(alphabet):
+                    obj[i] = data.get(letter, 0.0)
+            elif dims == 2:
+                for i1, letter1 in enumerate(alphabet):
+                    for i2, letter2 in enumerate(alphabet):
+                        key = (letter1, letter2)
+                        value = data.get(key, 0.0)
+                        obj[i1, i2] = value
+            obj._alphabet = alphabet
+            return obj
+        if alphabet is None:
+            alphabet = string.ascii_uppercase
+        elif not (isinstance(alphabet, str) or isinstance(alphabet, tuple)):
+            raise ValueError("alphabet should be a string or a tuple")
+        n = len(alphabet)
+        if data is None:
+            if dims is None:
+                dims = 1
+            elif dims not in (1, 2):
+                raise ValueError("dims should be 1 or 2 (found %d)" % dims)
+            shape = (n, ) * dims
+        else:
+            if dims is None:
+                shape = data.shape
+                dims = len(shape)
+                if dims == 1:
+                    pass
+                elif dims == 2:
+                    if shape[0] != shape[1]:
+                        raise ValueError("data array is not square")
+                else:
+                    raise ValueError("data array should be 1- or 2- "
+                                     "dimensional (found %d dimensions) "
+                                     % dims)
+            else:
+                shape = (n, ) * dims
+                if data.shape != shape:
+                    raise ValueError("data shape has inconsistent shape "
+                                     "(expected (%s), found (%s))"
+                                     % (shape, data.shape))
+        obj = super(Array, cls).__new__(cls, shape, dtype)
+        if data is None:
+            obj[:] = 0.0
+        else:
+            obj[:] = data
+        obj._alphabet = alphabet
+        return obj
+
+    def __array_finalize__(self, obj):
+        if obj is None:
+            return
+        self._alphabet = getattr(obj, "_alphabet", None)
+
+    def _convert_key(self, key):
+        if isinstance(key, tuple):
+            indices = []
+            for index in key:
+                if isinstance(index, str):
+                    try:
+                        index = self._alphabet.index(index)
+                    except ValueError:
+                        raise_from(IndexError("'%s'" % index), None)
+                indices.append(index)
+            key = tuple(indices)
+        elif isinstance(key, str):
+            try:
+                key = self._alphabet.index(key)
+            except ValueError:
+                raise_from(IndexError("'%s'" % key), None)
+        return key
+
+    def __getitem__(self, key):
+        key = self._convert_key(key)
+        value = numpy.ndarray.__getitem__(self, key)
+        if value.ndim == 2:
+            if self.ndim == 2:
+                if value.shape != self.shape:
+                    raise IndexError("Requesting truncated array")
+            elif self.ndim == 1:
+                length = self.shape[0]
+                if value.shape[0] == length and value.shape[1] == 1:
+                    pass
+                elif value.shape[0] == 1 and value.shape[1] == length:
+                    pass
+                else:
+                    raise IndexError("Requesting truncated array")
+        elif value.ndim == 1:
+            if value.shape[0] != self.shape[0]:
+                value._alphabet = self.alphabet[key]
+        return value.view(Array)
+
+    def __setitem__(self, key, value):
+        key = self._convert_key(key)
+        numpy.ndarray.__setitem__(self, key, value)
+
+    def __contains__(self, key):
+        # Follow dict definition of __contains__
+        return key in self.keys()
+
+    def __array_prepare__(self, out_arr, context=None):
+        # needed for numpy older than 1.13.0
+        ufunc, inputs, i = context
+        alphabet = self.alphabet
+        for arg in inputs:
+            if isinstance(arg, Array):
+                if arg.alphabet != alphabet:
+                    raise ValueError("alphabets are inconsistent")
+        return numpy.ndarray.__array_prepare__(self, out_arr, context)
+
+    def __array_wrap__(self, out_arr, context=None):
+        if len(out_arr) == 1:
+            return out_arr[0]
+        return numpy.ndarray.__array_wrap__(self, out_arr, context)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        args = []
+        alphabet = self._alphabet
+        for arg in inputs:
+            if isinstance(arg, Array):
+                if arg.alphabet != alphabet:
+                    raise ValueError("alphabets are inconsistent")
+                args.append(arg.view(numpy.ndarray))
+            else:
+                args.append(arg)
+
+        outputs = kwargs.pop("out", None)
+        if outputs:
+            out_args = []
+            for arg in outputs:
+                if isinstance(arg, Array):
+                    if arg.alphabet != alphabet:
+                        raise ValueError("alphabets are inconsistent")
+                    out_args.append(arg.view(numpy.ndarray))
+                else:
+                    out_args.append(arg)
+            kwargs["out"] = tuple(out_args)
+        else:
+            outputs = (None,) * ufunc.nout
+
+        raw_results = super(Array, self).__array_ufunc__(ufunc, method,
+                                                         *args, **kwargs)
+        if raw_results is NotImplemented:
+            return NotImplemented
+
+        if method == "at":
+            return
+
+        if ufunc.nout == 1:
+            raw_results = (raw_results,)
+
+        results = []
+        for raw_result, output in zip(raw_results, outputs):
+            if raw_result.ndim == 0:
+                result = raw_result
+            elif output is None:
+                result = numpy.asarray(raw_result).view(Array)
+                result._alphabet = self._alphabet
+            else:
+                result = output
+                result._alphabet = self._alphabet
+            results.append(result)
+
+        return results[0] if len(results) == 1 else results
+
+    def transpose(self, axes=None):
+        """Transpose the array."""
+        other = numpy.ndarray.transpose(self, axes)
+        other._alphabet = self._alphabet
+        return other
+
+    @property
+    def alphabet(self):
+        """Return the alphabet property."""
+        return self._alphabet
+
+    def copy(self):
+        """Create and return a copy of the array."""
+        other = Array(alphabet=self._alphabet, data=self)
+        return other
+
+    def get(self, key, value=None):
+        """Return the value of the key if found; return value otherwise."""
+        try:
+            return self[key]
+        except IndexError:
+            return value
+
+    def items(self):
+        """Return an iterator  of (key, value) pairs in the array."""
+        dims = len(self.shape)
+        if dims == 1:
+            for index, key in enumerate(self._alphabet):
+                value = numpy.ndarray.__getitem__(self, index)
+                yield key, value
+        elif dims == 2:
+            for i1, c1 in enumerate(self._alphabet):
+                for i2, c2 in enumerate(self._alphabet):
+                    key = (c1, c2)
+                    value = numpy.ndarray.__getitem__(self, (i1, i2))
+                    yield key, value
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def keys(self):
+        """Return a tuple with the keys associated with the array."""
+        dims = len(self.shape)
+        alphabet = self._alphabet
+        if dims == 1:
+            return tuple(alphabet)
+        elif dims == 2:
+            return tuple((c1, c2) for c2 in alphabet for c1 in alphabet)
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def values(self):
+        """Return a tuple with the values stored in the array."""
+        dims = len(self.shape)
+        alphabet = self._alphabet
+        if dims == 1:
+            return tuple(self)
+        elif dims == 2:
+            n1, n2 = self.shape
+            return tuple(numpy.ndarray.__getitem__(self, (i1, i2)) for i2 in range(n2) for i1 in range(n1))
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def update(self, E=None, **F):
+        """Update the array from dict/iterable E and F."""
+        if E is not None:
+            try:
+                alphabet = E.keys()
+            except AttributeError:
+                for key, value in E:
+                    self[key] = value
+            else:
+                for key in E:
+                    self[key] = E[key]
+        for key in F:
+            self[key] = F[key]
+
+    def _format_1D(self, fmt):
+        _alphabet = self._alphabet
+        n = len(_alphabet)
+        words = [None for i in range(n)]
+        lines = []
+        try:
+            header = self.header
+        except AttributeError:
+            pass
+        else:
+            for line in header:
+                line = "#  %s\n" % line
+                lines.append(line)
+        maxwidth = 0
+        for i, key in enumerate(_alphabet):
+            value = self[key]
+            word = fmt % value
+            width = len(word)
+            if width > maxwidth:
+                maxwidth = width
+            words[i] = word
+        fmt2 = " %" + str(maxwidth) + "s"
+        for letter, word in zip(_alphabet, words):
+            word = fmt2 % word
+            line = letter + word + "\n"
+            lines.append(line)
+        text = "".join(lines)
+        return text
+
+    def _format_2D(self, fmt):
+        alphabet = self.alphabet
+        n = len(alphabet)
+        words = [[None for j in range(n)] for i in range(n)]
+        lines = []
+        try:
+            header = self.header
+        except AttributeError:
+            pass
+        else:
+            for line in header:
+                line = "#  %s\n" % line
+                lines.append(line)
+        width = max(len(c) for c in alphabet)
+        line = " " * width
+        for j, c2 in enumerate(alphabet):
+            maxwidth = 0
+            for i, c1 in enumerate(alphabet):
+                key = (c1, c2)
+                value = self[key]
+                word = fmt % value
+                width = len(word)
+                if width > maxwidth:
+                    maxwidth = width
+                words[i][j] = word
+            fmt2 = " %" + str(maxwidth) + "s"
+            word = fmt2 % c2
+            line += word
+            for i, c1 in enumerate(alphabet):
+                word = words[i][j]
+                words[i][j] = fmt2 % word
+        line = line.rstrip() + "\n"
+        lines.append(line)
+        for letter, row in zip(alphabet, words):
+            line = letter + "".join(row) + "\n"
+            lines.append(line)
+        text = "".join(lines)
+        return text
+
+    def __format__(self, fmt):
+        if fmt == "":
+            if numpy.issubdtype(self.dtype, numpy.integer):
+                fmt = "%i"
+            else:
+                fmt = "%.1f"
+        n = len(self.shape)
+        if n == 1:
+            return self._format_1D(fmt)
+        elif n == 2:
+            return self._format_2D(fmt)
+        else:
+            raise RuntimeError("Array has unexpected rank %d" % n)
+
+    def __str__(self):
+        return self.__format__("")
+
+    def __repr__(self):
+        text = numpy.ndarray.__repr__(self)
+        alphabet = self._alphabet
+        if isinstance(alphabet, str):
+            assert text.endswith(")")
+            text = text[:-1] + ",\n         alphabet='%s')" % self._alphabet
+        return text
+
+
+if sys.version_info[0] < 3 and platform.python_implementation() == "PyPy":
+    # For python2 on PyPy, subclassing from a numpy array, which supports the
+    # buffer protocol, loses the Py_TPFLAGS_HAVE_NEWBUFFER flag on tp_flags on
+    # the class type, although the subclass still supports the buffer protocol.
+    # Adding this flag by hand here, as a temporary hack until we drop python2.
+    from .. import _aligners
+    _aligners.add_buffer_protocol_flag(Array)
+
+
+def read(handle, dtype=float):
+    """Parse the file and return an Array object."""
+    header = []
+    with File.as_handle(handle) as fp:
+        for line in fp:
+            if not line.startswith("#"):
+                break
+            header.append(line[1:].strip())
+        row = line.split()
+        rows = [row]
+        for line in fp:
+            row = line.split()
+            rows.append(row)
+    if len(rows[0]) == len(rows[1]) == 2:
+        alphabet = [key for key, value in rows]
+        for key in alphabet:
+            if len(key) > 1:
+                alphabet = tuple(alphabet)
+                break
+        else:
+            alphabet = "".join(alphabet)
+        matrix = Array(alphabet=alphabet, dims=1, dtype=dtype)
+        matrix.update(rows)
+    else:
+        alphabet = rows.pop(0)
+        for key in alphabet:
+            if len(key) > 1:
+                alphabet = tuple(alphabet)
+                break
+        else:
+            alphabet = "".join(alphabet)
+        matrix = Array(alphabet=alphabet, dims=2, dtype=dtype)
+        for letter1, row in zip(alphabet, rows):
+            assert letter1 == row.pop(0)
+            for letter2, word in zip(alphabet, row):
+                matrix[letter1, letter2] = float(word)
+    matrix.header = header
+    return matrix
+
+
+def load(name=None):
+    """Load and return a precalculated substitution matrix.
+
+    >>> from Bio.Align import substitution_matrices
+    >>> names = substitution_matrices.load()
+    """
+    path = os.path.realpath(__file__)
+    directory = os.path.dirname(path)
+    subdirectory = os.path.join(directory, "data")
+    if name is None:
+        filenames = os.listdir(subdirectory)
+        return sorted(filenames)
+    path = os.path.join(subdirectory, name)
+    matrix = read(path)
+    return matrix