Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -7,18 +7,27 @@ environment:
  matrix:
    - PY_MAJOR_VER: 2
      PYTHON_ARCH: "x86"
-    - PY_MAJOR_VER: 3
+    - PY_MAJOR_VER: 2
      PYTHON_ARCH: "x86_64"
    - PY_MAJOR_VER: 3
      PYTHON_ARCH: "x86"
+    - PY_MAJOR_VER: 3
+      PYTHON_ARCH: "x86_64"

 matrix:
  fast_finish: true

 # Enable MySQL and PostgreSQL for BioSQL tests
 services:
-  - mysql
  - postgresql
+  - mysql
+
+before_build:
+  - SET PGUSER=postgres
+  - SET PGPASSWORD=Password12!
+  - SET PATH=C:\Program Files\PostgreSQL\9.6\bin\;%PATH%
+  - psql -U postgres -c "CREATE USER biosql_user WITH PASSWORD 'biosql_pass';"
+  - psql -U postgres -c "CREATE DATABASE biosql_test OWNER biosql_user;"

 build_script:
 # If there's a newer build queued for the same PR, cancel this one
@@ -33,7 +42,7 @@ build_script:
  - conda update conda
  - conda install setuptools numpy mysql-connector-python psycopg2 matplotlib networkx reportlab scipy coverage
  - if "PY_MAJOR_VER"=="2" conda install unittest2
-  - pip install . -vvv
+  - python setup.py build

 test_script:
  - python -c "import sys; print(sys.version)"

--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
+# See https://help.github.com/articles/about-codeowners/
+# and https://github.com/blog/2392-introducing-code-owners
+#
+# Lines starting with '#' are comments.
+# Each line is a file pattern followed by one or more owners.
+#
+# These owners will be the default owners for everything in the repo.
+# *       @defunkt
+#
+# Order is important. The last matching pattern has the most precedence.
+# So if a pull request only touches javascript files, only these owners
+# will be requested to review.
+# *.js    @octocat @github/js
+#
+# You can also use email addresses if you prefer.
+# docs/*  docs@example.com
+#
+
+Bio/Alphabet @peterjc
+
+Bio/Align/* @peterjc
+Bio/AlignIO/* @peterjc
+Tests/test_AlignIO* @peterjc
+
+Bio/bgzf.py @peterjc
+Tests/test_bgzf*.py @peterjc
+
+Bio/Cluster/* @mdehoon
+Tests/test_Cluster*.py @mdehoon
+
+Bio/codonalign/* @zruan
+Tests/test_codonalign*.py @zruan
+
+Bio/Entrez/* @mdehoon
+Tests/test_Entrez*.py @mdehoon
+
+Bio/GenBank/* @peterjc
+Tests/test_GenBank*.py @peterjc
+Tests/test_EMBL*.py @peterjc
+
+Bio/Graphics/GenomeDiagram/* @widdowquinn @peterjc
+Tests/test_GenomeDiagram*.py @widdowquinn @peterjc
+
+Bio/motifs/* @mdehoon
+Tests/test_motifs*.py @mdehoon
+
+Bio/PDB/* @joaorodrigues @lennax
+Tests/test_PDB*.py @joaorodrigues @lennax
+
+Bio/Phylo/* @etal
+Tests/test_Phylo*.py @etal
+
+Bio/PopGen/* @tiagoantao
+Tests/test_PopGen*.py @tiagoantao
+
+Bio/SearchIO/* @bow
+Tests/test_SearchIO*.py @bow
+
+Bio/Seq*.py @peterjc
+Bio/SeqIO/* @peterjc
+Tests/test_Seq* @peterjc
+Tests/test_seq* @peterjc
+
+Bio/SeqIO/AbiIO.py @peterjc @bow
+Tests/test_SeqIO_AbiIO.py @peterjc @bow
+
+Bio/TogoWS/* @peterjc
+Tests/test_TogoWS*.py @peterjc
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
+### Setup
+
+I am reporting a problem with Biopython version, Python version, and operating
+system as follows:
+
+```python
+import sys; print(sys.version)
+import platform; print(platform.python_implementation()); print(platform.platform())
+import Bio; print(Bio.__version__)
+```
+
+(*Please copy and run the above in your Python, and copy-and-paste the output*)
+
+### Expected behaviour
+
+(*Please fill this in*)
+
+### Actual behaviour
+
+(*Please fill this in, and provide any exception message in full*)
+
+### Steps to reproduce
+
+(*Please fill this in *)
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
+This pull request addresses issue #...
+
+I hereby agree to dual licence this and any previous contributions under both
+the _Biopython License Agreement_ **AND** the _BSD 3-Clause License_.
+
+I have read the ``CONTRIBUTING.rst`` file and understand that AppVeyor and
+TravisCI will be used to confirm the Biopython unit tests and ``flake8`` style
+checks pass with these changes.
+
+I am happy be thanked by name in the ``NEWS.rst`` and ``CONTRIB.rst`` files,
+and have added myself to those files as part of this pull request. (*This
+acknowledgement is optional. Note we list the names sorted alphabetically.*)
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ build
 dist
 
 #Ignore backup files from some Unix editors,
+\#*.py\#
 *~
 *.swp
 *.bak
@@ -94,3 +95,6 @@ Doc/*/*/hevea.sty
 #Ignore IntelliJ IDEA directory and project files
 .idea
 *.iml
+
+#Ignore unittest cache dirctory
+.cache/
--- a/.travis-tox.ini
+++ b/.travis-tox.ini
@@ -61,8 +61,8 @@ deps =
    {py27}: mysql-python
    {py27,py36}: mmtf-python
    {py27,py35}: reportlab
-    {py27,py35}: psycopg2
-    {py27,py35,pypy}: mysql-connector-python-rf
+    {py27,py34,py35,py36}: psycopg2-binary
+    {py27,py34,py35,py35}: mysql-connector-python-rf
    {py27,py35,pypy}: rdflib
    {pypy,pypy3}: numpy==1.12.1
    {py27,py34,py35,py36}: numpy
@@ -91,6 +91,7 @@ deps =
    flake8
    flake8-docstrings
    flake8-blind-except
+    flake8-rst-docstrings
    restructuredtext_lint
 commands =
    # These folders each have their own .flake8 file:
@@ -108,6 +109,10 @@ commands =
    bash -c \'grep "1999-`date +'%Y'`" LICENSE.rst\'
    # Check no __docformat__ lines
    bash -c "if grep --include '*.py' -rn '^__docformat__ ' Bio BioSQL Tests Scripts Doc ; then echo 'Remove __docformat__ line(s), we assume restructuredtext.'; false; fi"
+    # Check DOI link style, see https://www.crossref.org/display-guidelines/
+    bash -c "if grep --include '*.py' --include '*.rst' --include '*.tex' -rni 'doi:' Bio BioSQL Scripts Doc ; then echo 'Please use https://doi.org/... not the doi: or DOI: style.'; false; fi"
+    bash -c "if grep --include '*.py' --include '*.rst' --include '*.tex' -rn 'dx\.doi\.org' Bio BioSQL Tests Scripts Doc ; then echo 'Please use https://doi.org/... not the dx.doi.org style.'; false; fi"
+    bash -c "if grep --include '*.py' --include '*.rst' --include '*.tex' -rn 'http://doi\.org' Bio BioSQL Tests Scripts Doc ; then echo 'Please use https://doi.org/... not http://doi.org/...'; false; fi"

 [testenv:sdist]
 # This does not need to install Biopython or any of its dependencies

--- a/.travis.yml
+++ b/.travis.yml
@@ -13,24 +13,27 @@
 # - basics - quick things like style and packaging
 # - test - the actual functional tests which are slow

+dist: trusty
 language: python
 cache: pip
 matrix:
  include:
    - stage: basics
+      python: 2.7
      env: TOXENV=style
      addons:
        apt:
          packages:
      before_install: echo "Going to run basic checks"
    - stage: basics
-      env: TOXENV=sdist
+      python: 3.6
+      env: TOXENV=style
      addons:
        apt:
          packages:
      before_install: echo "Going to run basic checks"
    - stage: basics
-      env: TOXENV=bdist_wheel
+      env: TOXENV=sdist,bdist_wheel
      addons:
        apt:
          packages:
@@ -76,45 +79,68 @@ addons:
 #
 # There is no GenePop Ubuntu pacakge, although it is in BioConda.
 #
-# There are TravisCI provided versions of PyPy and PyPy3, but currently too old.
-# We therefore deactivate that, and download and unzip portable PyPy binaries.
-#
 # We also need DSSP for testing but it is not available in the repositories.
 # Try to download the binary for Linux and place it in $HOME/bin
 #
 # This before_install list is only used for the test stage.
 before_install:
-  - cd $HOME
-  - mkdir bin
+  - pushd $HOME
+  - mkdir -p bin
  - export PATH=$HOME/bin:$PATH
  - echo "Installing PhyML"
  - curl -L -O http://www.atgc-montpellier.fr/download/binaries/phyml/PhyML-3.1.zip
  - unzip PhyML-3.1.zip
  - mv PhyML-3.1/PhyML-3.1_linux64 bin/phyml
-  - cd $HOME
-  - "if [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then deactivate && wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.7.1-linux_x86_64-portable.tar.bz2 && tar -jxvf pypy-5.7.1-linux_x86_64-portable.tar.bz2 && echo 'Setting up aliases...' && cd pypy-5.7.1-linux_x86_64-portable/bin/ && export PATH=$PWD:$PATH && ln -s pypy python && echo 'Setting up pip...' && ./pypy -m ensurepip ; fi"
-  - "if [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then deactivate && wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.5-5.8-beta-linux_x86_64-portable.tar.bz2 && tar -jxvf pypy3.5-5.8-beta-linux_x86_64-portable.tar.bz2 && echo 'Setting up aliases...' && cd pypy3.5-5.8-beta-linux_x86_64-portable/bin/ && export PATH=$PWD:$PATH && ln -s pypy3 python && echo 'Setting up pip...' && ./pypy3 -m ensurepip && ln -s pip3 pip ; fi"
-  - cd $HOME
-  - echo "Installing dssp"
-  - curl -L -O ftp://ftp.cmbi.ru.nl/pub/software/dssp/dssp-2.0.4-linux-amd64
-  - mv dssp-2.0.4-linux-amd64 bin/dssp
-  - chmod a+x bin/dssp
+  #- echo "Installing dssp"
+  #- curl -L -O ftp://ftp.cmbi.ru.nl/pub/software/dssp/dssp-2.0.4-linux-amd64
+  #- mv dssp-2.0.4-linux-amd64 bin/dssp
+  #- chmod a+x bin/dssp
  - echo "Installing Genepop"
  - curl -L -O https://anaconda.org/bioconda/genepop/4.5.1/download/linux-64/genepop-4.5.1-0.tar.bz2
  # This will create ./bin/Genepop and a harmless ./info/ folder.
  - tar -jxvf genepop-4.5.1-0.tar.bz2
-  - cd $TRAVIS_BUILD_DIR
-  - "cp Tests/biosql.ini.sample Tests/biosql.ini"
+  # There are TravisCI provided versions of PyPy and PyPy3, but currently too old.
+  # We therefore deactivate that, and download and unzip portable PyPy binaries.
+  - |
+    if [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then
+        deactivate
+        wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.10.0-linux_x86_64-portable.tar.bz2
+        tar -jxvf pypy-5.10.0-linux_x86_64-portable.tar.bz2
+        echo 'Setting up aliases...'
+        cd pypy-5.10.0-linux_x86_64-portable/bin/
+        export PATH=$PWD:$PATH
+        ln -s pypy python
+        echo 'Setting up pip...'
+        ./pypy -m ensurepip
+    fi
+  - |
+    if [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then
+        deactivate
+        wget https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.5-5.10.1-linux_x86_64-portable.tar.bz2
+        tar -jxvf pypy3.5-5.10.1-linux_x86_64-portable.tar.bz2
+        echo 'Setting up aliases...'
+        cd pypy3.5-5.10.1-linux_x86_64-portable/bin/
+        export PATH=$PWD:$PATH
+        ln -s pypy3 python
+        echo 'Setting up pip...'
+        ./pypy3 -m ensurepip
+        ln -s pip3 pip
+    fi
+  - popd
+  - cp Tests/biosql.ini.sample Tests/biosql.ini
+  - psql -c "create database biosql_test;" -U postgres
+  - psql -c "create user biosql_user with encrypted password 'biosql_pass';" -U postgres
+  - psql -c "grant all privileges on database biosql_test to biosql_user;" -U postgres


 # This is minimal and used under all stages
 install:
-  - "pip install --upgrade pip setuptools"
-  - "pip install tox"
-  - "tox -c .travis-tox.ini -e $TOXENV --notest"
+  - pip install --upgrade pip setuptools
+  - pip install tox
+  - tox -c .travis-tox.ini -e $TOXENV --notest

 script:
-  - "tox -c .travis-tox.ini -e $TOXENV"
+  - travis_wait tox -c .travis-tox.ini -e $TOXENV

 notifications:
  email: false
--- a/Bio/.flake8
+++ b/Bio/.flake8
@@ -7,7 +7,18 @@ ignore =
    # =======================
    # pycodestyle v2.3.1 default ignore is E121,E123,E126,E226,E24,E704,W503
    # flake8 v3.3.0 default ignore is      E121,E123,E126,E226,E24,E704,W503,W504
-    E122,E123,E126,E127,E128,E501,E731,F401,F812,F841,
+    #
+    # These are ignored by default:
+    E122,E123,E126,W503,
+    # These are not ignored by default:
+    # E127	continuation line over-indented for visual indent
+    # E128	continuation line under-indented for visual indent
+    # E501	line too long (XX > 79 characters)
+    # E731	do not assign a lambda expression, use a def
+    # F401	module imported but unused
+    # F841	local variable name is assigned to but never used
+    # TODO: Fix some of these?
+    E127,E128,E501,E731,F401,F841,
    # =====================================
    # pydocstyle: D1## - Missing Docstrings
    # =====================================
@@ -15,41 +26,16 @@ ignore =
    # D101	Missing docstring in public class
    # D102	Missing docstring in public method
    # D103	Missing docstring in public function
-    # D104	Missing docstring in public package
    # D105	Missing docstring in magic method
    # TODO: Fix some of these?
-    D100,D101,D102,D103,D104,D105,
+    D100,D101,D102,D103,D105,
    # ====================================
    # pydocstyle: D2## - Whitespace Issues
    # ====================================
-    # D200	One-line docstring should fit on one line with quotes
-    # D202	No blank lines allowed after function docstring
    # D203	1 blank line required before class docstring
-    # D204	1 blank line required after class docstring
-    # D205	1 blank line required between summary line and description
-    # D207	Docstring is under-indented
-    # D208	Docstring is over-indented
-    # D210	No whitespaces allowed surrounding docstring text
-    # TODO: Fix these:
-    D205,
-    # We ignore	D203 deliberately in favour of passing D211,
+    # D211	No blank lines allowed before class docstring
+    # We ignore D203 deliberately in favour of passing D211,
    D203,
-    # ================================
-    # pydocstyle: D3## - Quotes Issues
-    # ================================
-    # D300	Use """triple double quotes"""
-    # D301	Use r""" if any backslashes in a docstring
-    # TODO: Fix this?:
-    D301,
-    # ===========================================
-    # pydocstyle: D4## - Docstring Content Issues
-    # ===========================================
-    # D400	First line should end with a period
-    # D401	First line should be in imperative mood
-    # D402	First line should not be the function’s "signature"
-    # D403	First word of the first line should be properly capitalized
-    # D412	No blank lines allowed between a section header and its content
-    D400,D401,D402,D403,
    # ================================================
    # flake8-commas: C#### (in case installed locally)
    # ================================================

--- a/Bio/Affy/CelFile.py
+++ b/Bio/Affy/CelFile.py
@@ -19,7 +19,10 @@ except ImportError:


 class ParserError(ValueError):
+    """Affymetrix parser error."""
+
    def __init__(self, *args):
+        """Initialise class."""
        super(ParserError, self).__init__(*args)


@@ -33,7 +36,7 @@ _modeError = ParserError("You're trying to open an Affymetrix v4"


 class Record(object):
-    """Stores the information in a cel file
+    """Stores the information in a cel file.

    Example usage:

@@ -65,6 +68,7 @@ class Record(object):
    """

    def __init__(self):
+        """Initialize class."""
        self.version = None
        self.GridCornerUL = None
        self.GridCornerUR = None
@@ -87,7 +91,7 @@ class Record(object):


 def read(handle):
-    """Reads Affymetrix CEL file and returns Record object.
+    """Read Affymetrix CEL file and return Record object.

    CEL files version 3 and 4 are supported, and the parser attempts version detection.

@@ -99,6 +103,7 @@ def read(handle):
    ...
    >>> c.version == 4
    True
+
    """
    # If we fail to read the magic number, then it will remain None, and thus
    # we will invoke read_v3 (if mode is not strict), or raise IOError if mode
@@ -135,8 +140,7 @@ def read(handle):

 # read Affymetrix files version 4.
 def read_v4(f):
-    """Reads Affymetrix CEL file, version 4, and returns a corresponding Record
-    object.
+    """Read verion 4 Affymetrix CEL file, returns corresponding Record object.

    Most importantly record.intensities correspond to intensities from the CEL
    file.
@@ -151,8 +155,9 @@ def read_v4(f):
    ...
    >>> c.version == 4
    True
-    >>> print(c.intensities.shape)
-    (5, 5)
+    >>> print("%i by %i array" % c.intensities.shape)
+    5 by 5 array
+
    """
    # We follow the documentation here:
    # http://www.affymetrix.com/estore/support/developer/powertools/changelog/gcos-agcc/cel.html.affx
@@ -278,7 +283,7 @@ def read_v4(f):


 def read_v3(handle):
-    """Reads Affymetrix CEL file, version 3, and returns a corresponding Record object.
+    """Read version 3 Affymetrix CEL file, and return corresponding Record object.

    Example Usage:

@@ -288,6 +293,7 @@ def read_v3(handle):
    ...
    >>> c.version == 3
    True
+
    """
    # Needs error handling.
    # Needs to know the chip design.

--- a/Bio/Align/Applications/_ClustalOmega.py
+++ b/Bio/Align/Applications/_ClustalOmega.py
@@ -20,9 +20,20 @@ class ClustalOmegaCommandline(AbstractCommandline):

    http://www.clustal.org/omega

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 1.2.0

+    References
+    ----------
+    Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
+    McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
+    Fast, scalable generation of high-quality protein multiple
+    sequence alignments using Clustal Omega.
+    Molecular Systems Biology 7:539 https://doi.org/10.1038/msb.2011.75
+
+    Examples
+    --------
    >>> from Bio.Align.Applications import ClustalOmegaCommandline
    >>> in_file = "unaligned.fasta"
    >>> out_file = "aligned.fasta"
@@ -30,23 +41,13 @@ class ClustalOmegaCommandline(AbstractCommandline):
    >>> print(clustalomega_cline)
    clustalo -i unaligned.fasta -o aligned.fasta --auto -v

-
    You would typically run the command line with clustalomega_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citation:
-    ---------
-
-    Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
-    McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
-    Fast, scalable generation of high-quality protein multiple
-    sequence alignments using Clustal Omega.
-    Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75
-
-    Last checked against versions: 1.2.0
    """

    def __init__(self, cmd="clustalo", **kwargs):
+        """Initialize the class."""
        # order parameters in the same order as clustalo --help
        self.parameters = \
            [

--- a/Bio/Align/Applications/_Clustalw.py
+++ b/Bio/Align/Applications/_Clustalw.py
@@ -15,9 +15,19 @@ class ClustalwCommandline(AbstractCommandline):

    http://www.clustal.org/

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against versions: 1.83 and 2.1
+
+    References
+    ----------
+    Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
+    McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
+    Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
+    Bioinformatics, 23, 2947-2948.

+    Examples
+    --------
    >>> from Bio.Align.Applications import ClustalwCommandline
    >>> in_file = "unaligned.fasta"
    >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
@@ -27,21 +37,12 @@ class ClustalwCommandline(AbstractCommandline):
    You would typically run the command line with clustalw_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citation:
-    ---------
-
-    Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
-    McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
-    Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
-    Bioinformatics, 23, 2947-2948.
-
-    Last checked against versions: 1.83 and 2.1
    """

    # TODO - Should we default to cmd="clustalw2" now?
    def __init__(self, cmd="clustalw", **kwargs):
-        self.parameters = \
-            [
+        """Initialize the class."""
+        self.parameters = [
            _Option(["-infile", "-INFILE", "INFILE", "infile"],
                    "Input sequences.",
                    filename=True),
@@ -127,20 +128,20 @@ class ClustalwCommandline(AbstractCommandline):
            # ***Fast Pairwise Alignments:***
            _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
                    "Word size",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
                    "Number of best diags.",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-window", "-WINDOW", "WINDOW", "window"],
                    "Window around best diags.",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
                    "Gap penalty",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-score", "-SCORE", "SCORE", "score"],
                    "Either: PERCENT or ABSOLUTE",
                    checker_function=lambda x: x in ["percent", "PERCENT",
@@ -148,26 +149,26 @@ class ClustalwCommandline(AbstractCommandline):
            # ***Slow Pairwise Alignments:***
            _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
                    "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
-                    checker_function=lambda x: x in ["BLOSUM", "PAM",
-                                                     "GONNET", "ID",
-                                                     "blosum", "pam",
-                                                     "gonnet", "id"] or
-                                                os.path.exists(x),
+                    checker_function=lambda x: (x in ["BLOSUM", "PAM",
+                                                      "GONNET", "ID",
+                                                      "blosum", "pam",
+                                                      "gonnet", "id"]
+                                                or os.path.exists(x)),
                    filename=True),
            _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
                    "DNA weight matrix=IUB, CLUSTALW or filename",
-                    checker_function=lambda x: x in ["IUB", "CLUSTALW",
-                                                     "iub", "clustalw"] or
-                                               os.path.exists(x),
+                    checker_function=lambda x: (x in ["IUB", "CLUSTALW",
+                                                      "iub", "clustalw"]
+                                                or os.path.exists(x)),
                    filename=True),
            _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
                    "Gap opening penalty",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
                    "Gap extension penalty",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            # ***Multiple Alignments:***
            _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
                    "Output file name for newly created guide tree",
@@ -178,32 +179,32 @@ class ClustalwCommandline(AbstractCommandline):
                    filename=True),
            _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"],
                    "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
-                    checker_function=lambda x: x in ["BLOSUM", "PAM",
-                                                     "GONNET", "ID",
-                                                     "blosum", "pam",
-                                                     "gonnet", "id"] or
-                                               os.path.exists(x),
+                    checker_function=lambda x: (x in ["BLOSUM", "PAM",
+                                                      "GONNET", "ID",
+                                                      "blosum", "pam",
+                                                      "gonnet", "id"]
+                                                or os.path.exists(x)),
                    filename=True),
            _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
                    "DNA weight matrix=IUB, CLUSTALW or filename",
-                    checker_function=lambda x: x in ["IUB", "CLUSTALW",
-                                                     "iub", "clustalw"] or
-                                               os.path.exists(x),
+                    checker_function=lambda x: (x in ["IUB", "CLUSTALW",
+                                                      "iub", "clustalw"]
+                                                or os.path.exists(x)),
                    filename=True),
            _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
                    "Gap opening penalty",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
                    "Gap extension penalty",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
                    "No end gap separation pen."),
            _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
                    "Gap separation pen. range",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"],
                    "Residue-specific gaps off"),
            _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"],
@@ -212,8 +213,8 @@ class ClustalwCommandline(AbstractCommandline):
                    "List hydrophilic res."),
            _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
                    "% ident. for delay",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            # Already handled in General Settings section, but appears a second
            # time under Multiple Alignments in the help
            # _Option(["-type", "-TYPE", "TYPE", "type"],
@@ -222,8 +223,8 @@ class ClustalwCommandline(AbstractCommandline):
            #                                         "protein", "dna"]),
            _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
                    "Transitions weighting",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"],
                    "NONE or TREE or ALIGNMENT",
                    checker_function=lambda x: x in ["NONE", "TREE",
@@ -277,20 +278,20 @@ class ClustalwCommandline(AbstractCommandline):
                                                     "both", "none"]),
            _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
                    "Gap penalty for helix core residues",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
                    "gap penalty for strand core residues",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
                    "Gap penalty for loop regions",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
                    "Gap penalty for structure termini",
-                    checker_function=lambda x: isinstance(x, int) or
-                                               isinstance(x, float)),
+                    checker_function=lambda x: (isinstance(x, int) or
+                                                isinstance(x, float))),
            _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
                    "Number of residues inside helix to be treated as terminal",
                    checker_function=lambda x: isinstance(x, int)),

--- a/Bio/Align/Applications/_Dialign.py
+++ b/Bio/Align/Applications/_Dialign.py
@@ -15,9 +15,17 @@ class DialignCommandline(AbstractCommandline):

    http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 2.2
+
+    References
+    ----------
+    B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
+    Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.

+    Examples
+    --------
    To align a FASTA file (unaligned.fasta) with the output files names
    aligned.* including a FASTA output file (aligned.fa), use:

@@ -30,16 +38,10 @@ class DialignCommandline(AbstractCommandline):
    You would typically run the command line with dialign_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citation:
-    ---------
-
-    B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
-    Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
-
-    Last checked against version: 2.2
    """

    def __init__(self, cmd="dialign2-2", **kwargs):
+        """Initialize the class."""
        self.program_name = cmd
        self.parameters = \
            [

--- a/Bio/Align/Applications/_MSAProbs.py
+++ b/Bio/Align/Applications/_MSAProbs.py
@@ -16,9 +16,18 @@ class MSAProbsCommandline(AbstractCommandline):

    http://msaprobs.sourceforge.net

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 0.9.7

+    References
+    ----------
+    Yongchao Liu, Bertil Schmidt, Douglas L. Maskell: "MSAProbs: multiple
+    sequence alignment based on pair hidden Markov models and partition
+    function posterior probabilities". Bioinformatics, 2010, 26(16): 1958 -1964
+
+    Examples
+    --------
    >>> from Bio.Align.Applications import MSAProbsCommandline
    >>> in_file = "unaligned.fasta"
    >>> out_file = "aligned.cla"
@@ -29,17 +38,10 @@ class MSAProbsCommandline(AbstractCommandline):
    You would typically run the command line with cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citation:
-    ---------
-
-    Yongchao Liu, Bertil Schmidt, Douglas L. Maskell: "MSAProbs: multiple
-    sequence alignment based on pair hidden Markov models and partition
-    function posterior probabilities". Bioinformatics, 2010, 26(16): 1958 -1964
-
-    Last checked against version: 0.9.7
    """

    def __init__(self, cmd="msaprobs", **kwargs):
+        """Initialize the class."""
        # order of parameters is the same as in msaprobs -help
        self.parameters = \
            [

--- a/Bio/Align/Applications/_Mafft.py
+++ b/Bio/Align/Applications/_Mafft.py
@@ -6,8 +6,6 @@

 from __future__ import print_function

-
-import os
 from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline


@@ -16,9 +14,33 @@ class MafftCommandline(AbstractCommandline):

    http://align.bmr.kyushu-u.ac.jp/mafft/software/

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: MAFFT v6.717b (2009/12/03)
+
+    References
+    ----------
+    Katoh, Toh (BMC Bioinformatics 9:212, 2008) Improved accuracy of
+    multiple ncRNA alignment by incorporating structural information into
+    a MAFFT-based framework (describes RNA structural alignment methods)
+
+    Katoh, Toh (Briefings in Bioinformatics 9:286-298, 2008) Recent
+    developments in the MAFFT multiple sequence alignment program
+    (outlines version 6)

+    Katoh, Toh (Bioinformatics 23:372-374, 2007)  Errata PartTree: an
+    algorithm to build an approximate tree from a large number of
+    unaligned sequences (describes the PartTree algorithm)
+
+    Katoh, Kuma, Toh, Miyata (Nucleic Acids Res. 33:511-518, 2005) MAFFT
+    version 5: improvement in accuracy of multiple sequence alignment
+    (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i
+    strategies)
+
+    Katoh, Misawa, Kuma, Miyata (Nucleic Acids Res. 30:3059-3066, 2002)
+
+    Examples
+    --------
    >>> from Bio.Align.Applications import MafftCommandline
    >>> mafft_exe = "/opt/local/mafft"
    >>> in_file = "../Doc/examples/opuntia.fasta"
@@ -37,6 +59,7 @@ class MafftCommandline(AbstractCommandline):

    You would typically run the command line with mafft_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.
+
    Note that MAFFT will write the alignment to stdout, which you may
    want to save to a file and then parse, e.g.::

@@ -54,32 +77,10 @@ class MafftCommandline(AbstractCommandline):
        from Bio import AlignIO
        align = AlignIO.read(StringIO(stdout), "fasta")

-    Citations:
-    ----------
-
-    Katoh, Toh (BMC Bioinformatics 9:212, 2008) Improved accuracy of
-    multiple ncRNA alignment by incorporating structural information into
-    a MAFFT-based framework (describes RNA structural alignment methods)
-
-    Katoh, Toh (Briefings in Bioinformatics 9:286-298, 2008) Recent
-    developments in the MAFFT multiple sequence alignment program
-    (outlines version 6)
-
-    Katoh, Toh (Bioinformatics 23:372-374, 2007)  Errata PartTree: an
-    algorithm to build an approximate tree from a large number of
-    unaligned sequences (describes the PartTree algorithm)
-
-    Katoh, Kuma, Toh, Miyata (Nucleic Acids Res. 33:511-518, 2005) MAFFT
-    version 5: improvement in accuracy of multiple sequence alignment
-    (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i
-    strategies)
-
-    Katoh, Misawa, Kuma, Miyata (Nucleic Acids Res. 30:3059-3066, 2002)
-
-    Last checked against version: MAFFT v6.717b (2009/12/03)
    """

    def __init__(self, cmd="mafft", **kwargs):
+        """Initialize the class."""
        BLOSUM_MATRICES = ["30", "45", "62", "80"]
        self.parameters = \
            [

--- a/Bio/Align/Applications/_Muscle.py
+++ b/Bio/Align/Applications/_Muscle.py
@@ -14,9 +14,20 @@ class MuscleCommandline(AbstractCommandline):

    http://www.drive5.com/muscle/

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 3.7, briefly against 3.8
+
+    References
+    ----------
+    Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with high
+    accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97.

+    Edgar, R.C. (2004) MUSCLE: a multiple sequence alignment method with
+    reduced time and space complexity. BMC Bioinformatics 5(1): 113.
+
+    Examples
+    --------
    >>> from Bio.Align.Applications import MuscleCommandline
    >>> muscle_exe = r"C:\Program Files\Aligments\muscle3.8.31_i86win32.exe"
    >>> in_file = r"C:\My Documents\unaligned.fasta"
@@ -28,19 +39,10 @@ class MuscleCommandline(AbstractCommandline):
    You would typically run the command line with muscle_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citations:
-    ----------
-
-    Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with high
-    accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97.
-
-    Edgar, R.C. (2004) MUSCLE: a multiple sequence alignment method with
-    reduced time and space complexity. BMC Bioinformatics 5(1): 113.
-
-    Last checked against version: 3.7, briefly against 3.8
    """

    def __init__(self, cmd="muscle", **kwargs):
+        """Initialize the class."""
        CLUSTERING_ALGORITHMS = ["upgma", "upgmb", "neighborjoining"]
        DISTANCE_MEASURES_ITER1 = ["kmer6_6", "kmer20_3", "kmer20_4",
                                   "kbit20_3", "kmer4_6"]

--- a/Bio/Align/Applications/_Prank.py
+++ b/Bio/Align/Applications/_Prank.py
@@ -15,9 +15,22 @@ class PrankCommandline(AbstractCommandline):

    http://www.ebi.ac.uk/goldman-srv/prank/prank/

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 081202
+
+    References
+    ----------
+    Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
+    multiple alignment of sequences with insertions. Proceedings of
+    the National Academy of Sciences, 102: 10557--10562.
+
+    Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
+    prevents errors in sequence alignment and evolutionary analysis.
+    Science, 320: 1632.

+    Examples
+    --------
    To align a FASTA file (unaligned.fasta) with the output in aligned
    FASTA format with the output filename starting with "aligned" (you
    can't pick the filename explicitly), no tree output and no XML output,
@@ -34,21 +47,10 @@ class PrankCommandline(AbstractCommandline):
    You would typically run the command line with prank_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citations:
-    ----------
-
-    Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
-    multiple alignment of sequences with insertions. Proceedings of
-    the National Academy of Sciences, 102: 10557--10562.
-
-    Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
-    prevents errors in sequence alignment and evolutionary analysis.
-    Science, 320: 1632.
-
-    Last checked against version: 081202
    """

    def __init__(self, cmd="prank", **kwargs):
+        """Initialize the class."""
        OUTPUT_FORMAT_VALUES = list(range(1, 18))
        self.parameters = [
            # ################# input/output parameters: ##################
@@ -160,14 +162,14 @@ class PrankCommandline(AbstractCommandline):
            # Doesn't specify type but Float and Int work
            _Option(["-matresize", "matresize"],
                    "Matrix resizing multiplier",
-                    checker_function=lambda x: isinstance(x, float) or
-                                               isinstance(x, int)),
+                    checker_function=lambda x: (isinstance(x, float) or
+                                                isinstance(x, int))),
            # -matinitsize=# [matrix initial size multiplier]
            # Doesn't specify type but Float and Int work
            _Option(["-matinitsize", "matinitsize"],
                    "Matrix initial size multiplier",
-                    checker_function=lambda x: isinstance(x, float) or
-                                               isinstance(x, int)),
+                    checker_function=lambda x: (isinstance(x, float) or
+                                                isinstance(x, int))),
            _Switch(["-longseq", "longseq"],
                    "Save space in pairwise alignments"),
            _Switch(["-pwgenomic", "pwgenomic"],

--- a/Bio/Align/Applications/_Probcons.py
+++ b/Bio/Align/Applications/_Probcons.py
@@ -15,9 +15,18 @@ class ProbconsCommandline(AbstractCommandline):

    http://probcons.stanford.edu/

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against version: 1.12
+
+    References
+    ----------
+    Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005.
+    PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment.
+    Genome Research 15: 330-340.

+    Examples
+    --------
    To align a FASTA file (unaligned.fasta) with the output in ClustalW
    format, and otherwise default settings, use:

@@ -29,6 +38,7 @@ class ProbconsCommandline(AbstractCommandline):

    You would typically run the command line with probcons_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.
+
    Note that PROBCONS will write the alignment to stdout, which you may
    want to save to a file and then parse, e.g.::

@@ -46,17 +56,10 @@ class ProbconsCommandline(AbstractCommandline):
        from Bio import AlignIO
        align = AlignIO.read(StringIO(stdout), "clustalw")

-    Citations:
-    ----------
-
-    Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005.
-    PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment.
-    Genome Research 15: 330-340.
-
-    Last checked against version: 1.12
    """

    def __init__(self, cmd="probcons", **kwargs):
+        """Initialize the class."""
        self.parameters = \
            [
            # Note that some options cannot be assigned via properties using the

--- a/Bio/Align/Applications/_TCoffee.py
+++ b/Bio/Align/Applications/_TCoffee.py
@@ -18,9 +18,17 @@ class TCoffeeCommandline(AbstractCommandline):
    This wrapper implements a VERY limited number of options - if you
    would like to help improve it please get in touch.

-    Example:
-    --------
+    Notes
+    -----
+    Last checked against: Version_6.92
+
+    References
+    ----------
+    T-Coffee: A novel method for multiple sequence alignments.
+    Notredame, Higgins, Heringa, JMB,302(205-217) 2000

+    Examples
+    --------
    To align a FASTA file (unaligned.fasta) with the output in ClustalW
    format (file aligned.aln), and otherwise default settings, use:

@@ -34,18 +42,12 @@ class TCoffeeCommandline(AbstractCommandline):
    You would typically run the command line with tcoffee_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

-    Citation:
-    ---------
-
-    T-Coffee: A novel method for multiple sequence alignments.
-    Notredame, Higgins, Heringa, JMB,302(205-217) 2000
-
-    Last checked against: Version_6.92
    """

    SEQ_TYPES = ["dna", "protein", "dna_protein"]

    def __init__(self, cmd="t_coffee", **kwargs):
+        """Initialize the class."""
        self.parameters = [
            _Option(["-output", "output"],
                    """Specify the output type.

--- a/Bio/Align/__init__.py
+++ b/Bio/Align/__init__.py
@@ -12,7 +12,7 @@ class, used in the Bio.AlignIO module.
 from __future__ import print_function

 from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
+from Bio.SeqRecord import SeqRecord, _RestrictedDict
 from Bio import Alphabet


@@ -103,7 +103,7 @@ class MultipleSeqAlignment(object):
    """

    def __init__(self, records, alphabet=None,
-                 annotations=None):
+                 annotations=None, column_annotations=None):
        """Initialize a new MultipleSeqAlignment object.

        Arguments:
@@ -115,6 +115,10 @@ class MultipleSeqAlignment(object):
                      record alphabets.  If omitted, a consensus alphabet is
                      used.
         - annotations - Information about the whole alignment (dictionary).
+         - column_annotations - Per column annotation (restricted dictionary).
+                      This holds Python sequences (lists, strings, tuples)
+                      whose length matches the number of columns. A typical
+                      use would be a secondary structure consensus string.

        You would normally load a MSA from a file using Bio.AlignIO, but you
        can do this from a list of SeqRecord objects too:
@@ -126,7 +130,9 @@ class MultipleSeqAlignment(object):
        >>> a = SeqRecord(Seq("AAAACGT", generic_dna), id="Alpha")
        >>> b = SeqRecord(Seq("AAA-CGT", generic_dna), id="Beta")
        >>> c = SeqRecord(Seq("AAAAGGT", generic_dna), id="Gamma")
-        >>> align = MultipleSeqAlignment([a, b, c], annotations={"tool": "demo"})
+        >>> align = MultipleSeqAlignment([a, b, c],
+        ...                              annotations={"tool": "demo"},
+        ...                              column_annotations={"stats": "CCCXCCC"})
        >>> print(align)
        DNAAlphabet() alignment with 3 rows and 7 columns
        AAAACGT Alpha
@@ -134,6 +140,8 @@ class MultipleSeqAlignment(object):
        AAAAGGT Gamma
        >>> align.annotations
        {'tool': 'demo'}
+        >>> align.column_annotations
+        {'stats': 'CCCXCCC'}

        NOTE - The older Bio.Align.Generic.Alignment class only accepted a
        single argument, an alphabet.  This is still supported via a backwards
@@ -180,6 +188,45 @@ class MultipleSeqAlignment(object):
            raise TypeError("annotations argument should be a dict")
        self.annotations = annotations

+        # Annotations about each colum of the alignment
+        if column_annotations is None:
+            column_annotations = {}
+        # Handle this via the property set function which will validate it
+        self.column_annotations = column_annotations
+
+    def _set_per_column_annotations(self, value):
+        if not isinstance(value, dict):
+            raise TypeError("The per-column-annotations should be a "
+                            "(restricted) dictionary.")
+        # Turn this into a restricted-dictionary (and check the entries)
+        if len(self):
+            # Use the standard method to get the length
+            expected_length = self.get_alignment_length()
+            self._per_col_annotations = _RestrictedDict(length=expected_length)
+            self._per_col_annotations.update(value)
+        else:
+            # Bit of a problem case... number of columns is undefined
+            self._per_col_annotations = None
+            if value:
+                raise ValueError("Can't set per-column-annotations without an alignment")
+
+    def _get_per_column_annotations(self):
+        if self._per_col_annotations is None:
+            # This happens if empty at initialisation
+            if len(self):
+                # Use the standard method to get the length
+                expected_length = self.get_alignment_length()
+            else:
+                # Should this raise an exception? Compare SeqRecord behaviour...
+                expected_length = 0
+            self._per_col_annotations = _RestrictedDict(length=expected_length)
+        return self._per_col_annotations
+
+    column_annotations = property(
+        fget=_get_per_column_annotations,
+        fset=_set_per_column_annotations,
+        doc="""Dictionary of per-letter-annotation for the sequence.""")
+
    def _str_line(self, record, length=50):
        """Return a truncated string representation of a SeqRecord (PRIVATE).

@@ -398,7 +445,7 @@ class MultipleSeqAlignment(object):
              By default, all sequences have the same weight. (0.0 =>
              no weight, 1.0 => highest weight)

-        In general providing a SeqRecord and calling .append is prefered.
+        In general providing a SeqRecord and calling .append is preferred.
        """
        new_seq = Seq(sequence, self._alphabet)

@@ -481,6 +528,9 @@ class MultipleSeqAlignment(object):
                return
            expected_length = len(rec)
            self._append(rec, expected_length)
+            # Can now setup the per-column-annotations as well, set to None
+            # while missing the length:
+            self.column_annotations = {}
            # Now continue to the rest of the records as usual

        for rec in records:
@@ -573,9 +623,11 @@ class MultipleSeqAlignment(object):
        >>> b2 = SeqRecord(Seq("GT", generic_dna), id="Beta")
        >>> c2 = SeqRecord(Seq("GT", generic_dna), id="Gamma")
        >>> left = MultipleSeqAlignment([a1, b1, c1],
-        ...                             annotations={"tool": "demo", "name": "start"})
+        ...                             annotations={"tool": "demo", "name": "start"},
+        ...                             column_annotations={"stats": "CCCXC"})
        >>> right = MultipleSeqAlignment([a2, b2, c2],
-        ...                             annotations={"tool": "demo", "name": "end"})
+        ...                             annotations={"tool": "demo", "name": "end"},
+        ...                             column_annotations={"stats": "CC"})

        Now, let's look at these two alignments:

@@ -621,6 +673,11 @@ class MultipleSeqAlignment(object):
        >>> combined.annotations
        {'tool': 'demo'}

+        Similarly any common per-column-annotations are combined:
+
+        >>> combined.column_annotations
+        {'stats': 'CCCXCCC'}
+
        """
        if not isinstance(other, MultipleSeqAlignment):
            raise NotImplementedError
@@ -634,7 +691,11 @@ class MultipleSeqAlignment(object):
        for k, v in self.annotations.items():
            if k in other.annotations and other.annotations[k] == v:
                annotations[k] = v
-        return MultipleSeqAlignment(merged, alpha, annotations)
+        column_annotations = dict()
+        for k, v in self.column_annotations.items():
+            if k in other.column_annotations:
+                column_annotations[k] = v + other.column_annotations[k]
+        return MultipleSeqAlignment(merged, alpha, annotations, column_annotations)

    def __getitem__(self, index):
        """Access part of the alignment.
@@ -754,7 +815,13 @@ class MultipleSeqAlignment(object):
            return self._records[index]
        elif isinstance(index, slice):
            # e.g. sub_align = align[i:j:k]
-            return MultipleSeqAlignment(self._records[index], self._alphabet)
+            new = MultipleSeqAlignment(self._records[index], self._alphabet)
+            if self.column_annotations and len(new) == len(self):
+                # All rows kept (although could have been reversed)
+                # Perserve the column annotations too,
+                for k, v in self.column_annotations.items():
+                    new.column_annotations[k] = v
+            return new
        elif len(index) != 2:
            raise TypeError("Invalid index type.")

@@ -768,8 +835,14 @@ class MultipleSeqAlignment(object):
            return "".join(rec[col_index] for rec in self._records[row_index])
        else:
            # e.g. sub_align = align[1:4, 5:7], gives another alignment
-            return MultipleSeqAlignment((rec[col_index] for rec in self._records[row_index]),
-                                        self._alphabet)
+            new = MultipleSeqAlignment((rec[col_index] for rec in self._records[row_index]),
+                                       self._alphabet)
+            if self.column_annotations and len(new) == len(self):
+                # All rows kept (although could have been reversed)
+                # Perserve the column annotations too,
+                for k, v in self.column_annotations.items():
+                    new.column_annotations[k] = v[col_index]
+            return new

    def sort(self, key=None, reverse=False):
        """Sort the rows (SeqRecord objects) of the alignment in place.

--- a/Bio/AlignIO/ClustalIO.py
+++ b/Bio/AlignIO/ClustalIO.py
@@ -49,6 +49,14 @@ class ClustalWriter(SequentialAlignmentWriter):
        if max_length <= 0:
            raise ValueError("Non-empty sequences are required")

+        if "clustal_consensus" in alignment.column_annotations:
+            star_info = alignment.column_annotations["clustal_consensus"]
+        elif hasattr(alignment, "_star_info"):
+            # This was originally stored by Bio.Clustalw as ._star_info
+            star_info = alignment._star_info
+        else:
+            star_info = None
+
        # keep displaying sequences until we reach the end
        while cur_char != max_length:
            # calculate the number of sequences to show, which will
@@ -70,10 +78,8 @@ class ClustalWriter(SequentialAlignmentWriter):
                output += line + "\n"

            # now we need to print out the star info, if we've got it
-            # This was stored by Bio.Clustalw using a ._star_info property.
-            if hasattr(alignment, "_star_info") and alignment._star_info != '':
-                output += (" " * 36) + \
-                    alignment._star_info[cur_char:(cur_char + show_num)] + "\n"
+            if star_info:
+                output += (" " * 36) + star_info[cur_char:(cur_char + show_num)] + "\n"

            output += "\n"
            cur_char += show_num
@@ -284,5 +290,7 @@ class ClustalIterator(AlignmentIterator):
            assert len(consensus) == alignment_length, \
                "Alignment length is %i, consensus length is %i, '%s'" \
                % (alignment_length, len(consensus), consensus)
+            alignment.column_annotations["clustal_consensus"] = consensus
+            # For backward compatibility prior to .column_annotations:
            alignment._star_info = consensus
        return alignment