Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · f2418d0e
--- a/.travis.yml
+++ b/.travis.yml
 # Check on http://lint.travis-ci.org/ after modifying it!  Originally
 # modified from https://gist.github.com/dan-blanchard/7045057
 sudo: false
-language: python
+language: c
+os:
+  - linux
+  - osx
 env:
-  - PYTHON_VERSION=3.6 MAKE_DOC=TRUE
-  - PYTHON_VERSION=3.5 MAKE_DOC=TRUE
-  - PYTHON_VERSION=3.4 USE_CYTHON=TRUE
+  - CONDA_PY=3.6 MAKE_DOC=TRUE
+  - CONDA_PY=3.5 MAKE_DOC=TRUE USE_CYTHON=TRUE
+
 before_install:
-  - "export DISPLAY=:99.0"
-  - "sh -e /etc/init.d/xvfb start"
-  - wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh
-  - chmod +x miniconda.sh
-  - ./miniconda.sh -b
-  - export PATH=/home/travis/miniconda3/bin:$PATH
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
+  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; fi
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
+  - hash -r
  # Update conda itself
  - conda update --yes conda
+
 install:
-  - conda create --yes -n env_name python=$PYTHON_VERSION
+  - conda create --yes -n env_name python=$CONDA_PY
  - conda install --yes -n env_name --file ci/conda_requirements.txt
  - if [ ${USE_CYTHON} ]; then conda install --yes -n env_name cython; fi
  - source activate env_name
@@ -24,11 +27,14 @@ install:
  - python --version
  - pip install -r ci/pip_requirements.txt
  - pip install . --no-deps
+
 script:
  - WITH_COVERAGE=TRUE make test
  - if [ ${MAKE_DOC} ]; then make -C doc clean html; fi
+
 after_success:
  - coveralls
+
 notifications:
  webhooks:
    urls:

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
 # scikit-bio changelog

+## Version 0.5.3 (2018-08-07)
+
+### Features
+
+* Added `unpack` and `unpack_by_func` to `skbio.tree.TreeNode` to unpack one or multiple internal nodes. The "unpack" operation removes an internal node and regrafts its children to its parent while retaining the overall length.
+
+* Added `support` to `skbio.tree.TreeNode` to return the support value of a node.
+
+* Added `permdisp` to `skbio.stats.distance` to test for the homogeniety of groups. ([#1228](https://github.com/biocore/scikit-bio/issues/1228)).
+
+* Added `pcoa_biplot` to `skbio.stats.ordination` to project descriptors into a PCoA plot.
+
+* Fixed pandas to 0.22.0 due to this: https://github.com/pandas-dev/pandas/issues/20527
+
+### Backward-incompatible changes [stable]
+
+### Backward-incompatible changes [experimental]
+
+### Performance enhancements
+
+### Bug fixes
+
+* Relaxing type checking in diversity calculations.  ([#1583](https://github.com/biocore/scikit-bio/issues/1583)).
+
+### Deprecated functionality [stable]
+
+### Deprecated functionality [experimental]
+
+### Miscellaneous
+
+
 ## Version 0.5.2 (2018-04-18)

 ### Features

--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@
   :target: http://scikit-bio.org
   :alt: scikit-bio logo

-|Build Status| |Coverage Status| |ASV Benchmarks| |Gitter Badge| |Depsy Badge| |Anaconda Cloud Build| |Anaconda Cloud| |License| |Downloads| |Install|
+|Build Status| |Coverage Status| |ASV Benchmarks| |Gitter Badge| |Depsy Badge| |Anaconda Build Platforms| |Anaconda Build Version| |License| |Downloads| |Install|

 scikit-bio is an open-source, BSD-licensed Python 3 package providing data structures, algorithms and educational resources for bioinformatics.

@@ -118,9 +118,9 @@ scikit-bio's logo was created by `Alina Prassas <http://cargocollective.com/alin
   :target: https://gitter.im/biocore/scikit-bio?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
 .. |Depsy Badge| image:: http://depsy.org/api/package/pypi/scikit-bio/badge.svg
   :target: http://depsy.org/package/python/scikit-bio
-.. |Anaconda Cloud Build| image:: https://anaconda.org/conda-forge/scikit-bio/badges/build.svg
-   :target: https://anaconda.org/biocore/scikit-bio/builds
-.. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/scikit-bio/badges/version.svg
+.. |Anaconda Build Platforms| image:: https://anaconda.org/conda-forge/scikit-bio/badges/platforms.svg
+   :target: https://anaconda.org/conda-forge/scikit-bio
+.. |Anaconda Build Version| image:: https://anaconda.org/conda-forge/scikit-bio/badges/version.svg
   :target: https://anaconda.org/conda-forge/scikit-bio
 .. |License| image:: https://anaconda.org/conda-forge/scikit-bio/badges/license.svg
   :target: https://anaconda.org/conda-forge/scikit-bio

--- a/ci/conda_requirements.txt
+++ b/ci/conda_requirements.txt
@@ -3,7 +3,7 @@ pip
 numpy<1.14.0
 scipy
 matplotlib
-pandas
+pandas<0.23.0
 nose
 pep8
 ipython

--- a/ci/pip_requirements.txt
+++ b/ci/pip_requirements.txt
@@ -7,3 +7,4 @@ Sphinx
 sphinx-bootstrap-theme
 numpydoc < 0.8.0
 check-manifest
+hdmedians
--- a/debian/changelog
+++ b/debian/changelog
+python-skbio (0.5.3-1) UNRELEASED; urgency=medium
+
+  * New upstream version
+  * Standards-Version: 4.2.0
+  TODO: Needs python-hdmedians (to be packaged)
+
+ -- Andreas Tille <tille@debian.org>  Wed, 15 Aug 2018 15:21:02 +0200
+
 python-skbio (0.5.2-1) unstable; urgency=medium

  * New upstream version

--- a/debian/control
+++ b/debian/control
@@ -27,7 +27,7 @@ Build-Depends: debhelper (>= 11~),
               python3-sphinx,
               python3-sphinx-bootstrap-theme,
               libssw-dev
-Standards-Version: 4.1.4
+Standards-Version: 4.2.0
 Vcs-Browser: https://salsa.debian.org/med-team/python-skbio
 Vcs-Git: https://salsa.debian.org/med-team/python-skbio.git
 Homepage: https://github.com/biocore/scikit-bio

--- a/setup.py
+++ b/setup.py
@@ -120,9 +120,10 @@ setup(name='scikit-bio',
          # a numpy 1.14.0 conda package on `defaults` or `conda-forge`
          # channels.
          'numpy >= 1.9.2, < 1.14.0',
-          'pandas >= 0.19.2',
+          'pandas >= 0.19.2, < 0.23.0',
          'scipy >= 0.15.1',
          'nose >= 1.3.7',
+          'hdmedians >= 0.13',
          'scikit-learn >= 0.19.1'
      ],
      test_suite='nose.collector',

--- a/skbio/__init__.py
+++ b/skbio/__init__.py
@@ -26,7 +26,7 @@ __all__ = ['Sequence', 'DNA', 'RNA', 'Protein', 'GeneticCode',
           'TreeNode', 'nj', 'read', 'write', 'OrdinationResults']

 __credits__ = "https://github.com/biocore/scikit-bio/graphs/contributors"
-__version__ = "0.5.2"
+__version__ = "0.5.3"

 mottos = [
    # 03/15/2014

--- a/skbio/alignment/_pairwise.py
+++ b/skbio/alignment/_pairwise.py
@@ -735,7 +735,7 @@ def local_pairwise_align_ssw(sequence1, sequence2, **kwargs):
    return msa, alignment.optimal_alignment_score, start_end


-@deprecated(as_of="0.4.0", until="0.5.3",
+@deprecated(as_of="0.4.0", until="0.5.4",
            reason="Will be replaced by a SubstitutionMatrix class. To track "
                   "progress, see [#161]"
                   "(https://github.com/biocore/scikit-bio/issues/161).")

--- a/skbio/diversity/_driver.py
+++ b/skbio/diversity/_driver.py
@@ -182,7 +182,7 @@ def alpha_diversity(metric, counts, ids=None, validate=True, **kwargs):
    return pd.Series(results, index=ids)


-@deprecated(as_of='0.5.0', until='0.5.3',
+@deprecated(as_of='0.5.0', until='0.6.0',
            reason=('The return type is unstable. Developer caution is '
                    'advised. The resulting DistanceMatrix object will '
                    'include zeros when distance has not been calculated, and '

--- a/skbio/diversity/_util.py
+++ b/skbio/diversity/_util.py
@@ -21,10 +21,8 @@ def _validate_counts_vector(counts, suppress_cast=False):

    """
    counts = np.asarray(counts)
-
-    if not suppress_cast:
-        counts = counts.astype(int, casting='safe', copy=False)
-
+    if not np.all(np.isreal(counts)):
+        raise ValueError("Counts vector must contain real-valued entries.")
    if counts.ndim != 1:
        raise ValueError("Only 1-D vectors are supported.")
    elif (counts < 0).any():

--- a/skbio/diversity/tests/test_util.py
+++ b/skbio/diversity/tests/test_util.py
@@ -57,9 +57,6 @@ class ValidationTests(TestCase):
        self.assertEqual(obs.dtype, int)

    def test_validate_counts_vector_invalid_input(self):
-        # wrong dtype
-        with self.assertRaises(TypeError):
-            _validate_counts_vector([0, 2, 1.2, 3])

        # wrong number of dimensions (2-D)
        with self.assertRaises(ValueError):
@@ -73,6 +70,10 @@ class ValidationTests(TestCase):
        with self.assertRaises(ValueError):
            _validate_counts_vector([0, 0, 2, -1, 3])

+        # strings
+        with self.assertRaises(ValueError):
+            _validate_counts_vector([0, 0, 'a', -1, 3])
+
    def test_validate_counts_matrix(self):
        # basic valid input (n=2)
        obs = _validate_counts_matrix([[0, 1, 1, 0, 2],
@@ -101,8 +102,6 @@ class ValidationTests(TestCase):
        npt.assert_array_equal(obs[1], np.array([42.2, 42.1, 1.0]))
        self.assertEqual(obs[0].dtype, float)
        self.assertEqual(obs[1].dtype, float)
-        with self.assertRaises(TypeError):
-            _validate_counts_matrix([[0.0], [1]], suppress_cast=False)

    def test_validate_counts_matrix_negative_counts(self):
        with self.assertRaises(ValueError):

--- a/skbio/sequence/_grammared_sequence.py
+++ b/skbio/sequence/_grammared_sequence.py
@@ -281,7 +281,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
        return set(cls.degenerate_map)

    @classproperty
-    @deprecated(as_of='0.5.0', until='0.5.3',
+    @deprecated(as_of='0.5.0', until='0.6.0',
                reason='Renamed to definite_chars')
    def nondegenerate_chars(cls):
        """Return non-degenerate characters.
@@ -492,7 +492,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
        """
        return np.in1d(self._bytes, self._definite_char_codes)

-    @deprecated(as_of='0.5.0', until='0.5.3',
+    @deprecated(as_of='0.5.0', until='0.6.0',
                reason='Renamed to definites')
    def nondegenerates(self):
        """Find positions containing non-degenerate characters in the sequence.
@@ -548,7 +548,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
        # TODO: cache results
        return bool(self.definites().any())

-    @deprecated(as_of='0.5.0', until='0.5.3',
+    @deprecated(as_of='0.5.0', until='0.6.0',
                reason='Renamed to has_definites')
    def has_nondegenerates(self):
        """Determine if sequence contains one or more non-degenerate characters

--- a/skbio/stats/distance/__init__.py
+++ b/skbio/stats/distance/__init__.py
@@ -154,6 +154,7 @@ Categorical Variable Stats

   anosim
   permanova
+   permdisp

 Continuous Variable Stats
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -196,9 +197,10 @@ from ._bioenv import bioenv
 from ._anosim import anosim
 from ._permanova import permanova
 from ._mantel import mantel, pwmantel
+from ._permdisp import permdisp

 __all__ = ['DissimilarityMatrixError', 'DistanceMatrixError', 'MissingIDError',
           'DissimilarityMatrix', 'DistanceMatrix', 'randdm', 'anosim',
-           'permanova', 'bioenv', 'mantel', 'pwmantel']
+           'permanova', 'bioenv', 'mantel', 'pwmantel', 'permdisp']

 test = TestRunner(__file__).test
--- a/skbio/stats/distance/_permdisp.py
+++ b/skbio/stats/distance/_permdisp.py
+# ----------------------------------------------------------------------------
+# Copyright (c) 2013--, scikit-bio development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from functools import partial
+
+import numpy as np
+from scipy.stats import f_oneway
+from scipy.spatial.distance import cdist
+
+import hdmedians as hd
+
+from ._base import (_preprocess_input, _run_monte_carlo_stats, _build_results)
+
+from skbio.stats.ordination import pcoa
+from skbio.util._decorator import experimental
+
+
+@experimental(as_of="0.5.2")
+def permdisp(distance_matrix, grouping, column=None, test='median',
+             permutations=999):
+    """Test for Homogeneity of Multivariate Groups Disperisons using Marti
+    Anderson's PERMDISP2 procedure.
+
+    PERMDISP is a multivariate analogue of Levene's test for homogeneity of
+    multivariate variances. Distances are handled by reducing the
+    original distances to principal coordinates. PERMDISP calculates an
+    F-statistic to assess whether the dispersions between groups is significant
+
+
+    Parameters
+    ----------
+    distance_matrix : DistanceMatrix
+        Distance matrix containing distances between objects (e.g., distances
+        between samples of microbial communities).
+    grouping : 1-D array_like or pandas.DataFrame
+        Vector indicating the assignment of objects to groups. For example,
+        these could be strings or integers denoting which group an object
+        belongs to. If `grouping` is 1-D ``array_like``, it must be the same
+        length and in the same order as the objects in `distance_matrix`. If
+        `grouping` is a ``DataFrame``, the column specified by `column` will be
+        used as the grouping vector. The ``DataFrame`` must be indexed by the
+        IDs in `distance_matrix` (i.e., the row labels must be distance matrix
+        IDs), but the order of IDs between `distance_matrix` and the
+        ``DataFrame`` need not be the same. All IDs in the distance matrix must
+        be present in the ``DataFrame``. Extra IDs in the ``DataFrame`` are
+        allowed (they are ignored in the calculations).
+    column : str, optional
+        Column name to use as the grouping vector if `grouping` is a
+        ``DataFrame``. Must be provided if `grouping` is a ``DataFrame``.
+        Cannot be provided if `grouping` is 1-D ``array_like``.
+    test : {'centroid', 'median'}
+        determines whether the analysis is done using centroid or spaitial
+        median.
+    permutations : int, optional
+        Number of permutations to use when assessing statistical
+        significance. Must be greater than or equal to zero. If zero,
+        statistical significance calculations will be skipped and the p-value
+        will be ``np.nan``.
+
+    Returns
+    -------
+    pandas.Series
+        Results of the statistical test, including ``test statistic`` and
+        ``p-value``.
+
+    Raises
+    ------
+    TypeError
+        If, when using the spatial median test, the pcoa ordination is not of
+        type np.float32 or np.float64, the spatial median function will fail
+        and the centroid test should be used instead
+    ValueError
+        If the test is not centroid or median.
+    TypeError
+        If the distance matrix is not an instance of a
+        ``skbio.DistanceMatrix``.
+    ValueError
+        If there is only one group
+    ValueError
+        If a list and a column name are both provided
+    ValueError
+        If a list is provided for `grouping` and it's length does not match
+        the number of ids in distance_matrix
+    ValueError
+        If all of the values in the grouping vector are unique
+    KeyError
+        If there are ids in grouping that are not in distance_matrix
+
+    See Also
+    --------
+    permanova
+    anosim
+
+    Notes
+    -----
+    The significance of the results from this function will be the same as the
+    results found in vegan's betadisper, however due to floating point
+    variability the F-statistic results may vary slightly.
+
+    See [1]_ for the original method reference, as well as
+    ``vegan::betadisper``, available in R's vegan package [2]_.
+
+    References
+    ----------
+    .. [1] Anderson, Marti J. "Distance-Based Tests for Homogeneity of
+        Multivariate Dispersions." Biometrics 62 (2006):245-253
+
+    .. [2] http://cran.r-project.org/web/packages/vegan/index.html
+
+    Examples
+    --------
+    Load a 6x6 distance matrix and grouping vector denoting 2 groups of
+    objects:
+
+    >>> from skbio import DistanceMatrix
+    >>> dm = DistanceMatrix([[0,    0.5,  0.75, 1, 0.66, 0.33],
+    ...                       [0.5,  0,    0.25, 0.33, 0.77, 0.61],
+    ...                       [0.75, 0.25, 0,    0.1, 0.44, 0.55],
+    ...                       [1,    0.33, 0.1,  0, 0.75, 0.88],
+    ...                       [0.66, 0.77, 0.44, 0.75, 0, 0.77],
+    ...                       [0.33, 0.61, 0.55, 0.88, 0.77, 0]],
+    ...                       ['s1', 's2', 's3', 's4', 's5', 's6'])
+    >>> grouping = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
+
+    Run PERMDISP using 99 permutations to caluculate the p-value:
+
+    >>> from skbio.stats.distance import permdisp
+    >>> import numpy as np
+    >>> #make output deterministic, should not be included during normal use
+    >>> np.random.seed(0)
+    >>> permdisp(dm, grouping, permutations=99)
+    method name               PERMDISP
+    test statistic name        F-value
+    sample size                      6
+    number of groups                 2
+    test statistic             1.03296
+    p-value                       0.35
+    number of permutations          99
+    Name: PERMDISP results, dtype: object
+
+    The return value is a ``pandas.Series`` object containing the results of
+    the statistical test.
+
+    To suppress calculation of the p-value and only obtain the F statistic,
+    specify zero permutations:
+
+    >>> permdisp(dm, grouping, permutations=0)
+    method name               PERMDISP
+    test statistic name        F-value
+    sample size                      6
+    number of groups                 2
+    test statistic             1.03296
+    p-value                        NaN
+    number of permutations           0
+    Name: PERMDISP results, dtype: object
+
+    PERMDISP computes variances based on two types of tests, using either
+    centroids or spatial medians, also commonly referred to as a geometric
+    median. The spatial median is thought to yield a more robust test
+    statistic, and this test is used by default. Spatial medians are computed
+    using an iterative algorithm to find the optimally minimum point from all
+    other points in a group while centroids are computed using a deterministic
+    formula. As such the two different tests yeild slightly different F
+    statistics.
+
+    >>> np.random.seed(0)
+    >>> permdisp(dm, grouping, test='centroid', permutations=99)
+    method name               PERMDISP
+    test statistic name        F-value
+    sample size                      6
+    number of groups                 2
+    test statistic             3.67082
+    p-value                       0.29
+    number of permutations          99
+    Name: PERMDISP results, dtype: object
+
+    You can also provide a ``pandas.DataFrame`` and a column denoting the
+    grouping instead of a grouping vector. The following DataFrame's
+    Grouping column specifies the same grouping as the vector we used in the
+    previous examples.:
+    >>> import pandas as pd
+    >>> df = pd.DataFrame.from_dict(
+    ...      {'Grouping': {'s1': 'G1', 's2': 'G1', 's3': 'G1', 's4': 'G2',
+    ...                    's5': 'G2', 's6': 'G2'}})
+    >>> # make output deterministic; should not be included during normal use
+    >>> np.random.seed(0)
+    >>> permdisp(dm, df, 'Grouping', permutations=99, test='centroid')
+    method name               PERMDISP
+    test statistic name        F-value
+    sample size                      6
+    number of groups                 2
+    test statistic             3.67082
+    p-value                       0.29
+    number of permutations          99
+    Name: PERMDISP results, dtype: object
+
+    Note that when providing a ``DataFrame``, the ordering of rows and/or
+    columns does not affect the grouping vector that is extracted. The
+    ``DataFrame`` must be indexed by the distance matrix IDs (i.e., the row
+    labels must be distance matrix IDs).
+
+    If IDs (rows) are present in the ``DataFrame`` but not in the distance
+    matrix, they are ignored. The previous example's ``s7`` ID illustrates this
+    behavior: note that even though the ``DataFrame`` had 7 objects, only 6
+    were used in the test (see the "Sample size" row in the results above to
+    confirm this). Thus, the ``DataFrame`` can be a superset of the distance
+    matrix IDs. Note that the reverse is not true: IDs in the distance matrix
+    *must* be present in the ``DataFrame`` or an error will be raised.
+
+    PERMDISP should be used to determine whether the dispersions between the
+    groups in your distance matrix are significantly separated.
+    A non-significant test result indicates that group dispersions are similar
+    to each other. PERMANOVA or ANOSIM should then be used in conjunction to
+    determine whether clustering within groups is significant.
+
+    """
+    if test not in ['centroid', 'median']:
+        raise ValueError('Test must be centroid or median')
+
+    ordination = pcoa(distance_matrix)
+    samples = ordination.samples
+
+    sample_size, num_groups, grouping, tri_idxs, distances = _preprocess_input(
+        distance_matrix, grouping, column)
+
+    test_stat_function = partial(_compute_groups, samples, test)
+
+    stat, p_value = _run_monte_carlo_stats(test_stat_function, grouping,
+                                           permutations)
+
+    return _build_results('PERMDISP', 'F-value', sample_size, num_groups,
+                          stat, p_value, permutations)
+
+
+def _compute_groups(samples, test_type, grouping):
+
+    groups = []
+
+    samples['grouping'] = grouping
+    if test_type == 'centroid':
+        centroids = samples.groupby('grouping').aggregate('mean')
+    elif test_type == 'median':
+        centroids = samples.groupby('grouping').aggregate(_config_med)
+
+    for label, df in samples.groupby('grouping'):
+        groups.append(cdist(df.values[:, :-1], [centroids.loc[label].values],
+                            metric='euclidean'))
+
+    stat, _ = f_oneway(*groups)
+    stat = stat[0]
+
+    return stat
+
+
+def _config_med(x):
+    """
+    slice the vector up to the last value to exclude grouping column
+    and transpose the vector to be compatible with hd.geomedian
+    """
+    X = x.values[:, :-1]
+    return np.array(hd.geomedian(X.T))
--- a/skbio/stats/distance/tests/data/moving_pictures_dm.tsv
+++ b/skbio/stats/distance/tests/data/moving_pictures_dm.tsv
--- a/skbio/stats/distance/tests/data/moving_pictures_mf.tsv
+++ b/skbio/stats/distance/tests/data/moving_pictures_mf.tsv
+#SampleID	BarcodeSequence	LinkerPrimerSequence	BodySite	Year	Month	Day	Subject	ReportedAntibioticUsage	DaysSinceExperimentStart	Description
+L1S8	AGCTGACTAGTC	GTGCCAGCMGCCGCGGTAA	gut	2008	10	28	subject-1	Yes	0	subject-1.gut.2008-10-28
+L1S57	ACACACTATGGC	GTGCCAGCMGCCGCGGTAA	gut	2009	1	20	subject-1	No	84	subject-1.gut.2009-1-20
+L1S76	ACTACGTGTGGT	GTGCCAGCMGCCGCGGTAA	gut	2009	2	17	subject-1	No	112	subject-1.gut.2009-2-17
+L1S105	AGTGCGATGCGT	GTGCCAGCMGCCGCGGTAA	gut	2009	3	17	subject-1	No	140	subject-1.gut.2009-3-17
+L2S155	ACGATGCGACCA	GTGCCAGCMGCCGCGGTAA	left palm	2009	1	20	subject-1	No	84	subject-1.left-palm.2009-1-20
+L2S175	AGCTATCCACGA	GTGCCAGCMGCCGCGGTAA	left palm	2009	2	17	subject-1	No	112	subject-1.left-palm.2009-2-17
+L2S204	ATGCAGCTCAGT	GTGCCAGCMGCCGCGGTAA	left palm	2009	3	17	subject-1	No	140	subject-1.left-palm.2009-3-17
+L2S222	CACGTGACATGT	GTGCCAGCMGCCGCGGTAA	left palm	2009	4	14	subject-1	No	168	subject-1.left-palm.2009-4-14
+L3S242	ACAGTTGCGCGA	GTGCCAGCMGCCGCGGTAA	right palm	2008	10	28	subject-1	Yes	0	subject-1.right-palm.2008-10-28
+L3S294	CACGACAGGCTA	GTGCCAGCMGCCGCGGTAA	right palm	2009	1	20	subject-1	No	84	subject-1.right-palm.2009-1-20
+L3S313	AGTGTCACGGTG	GTGCCAGCMGCCGCGGTAA	right palm	2009	2	17	subject-1	No	112	subject-1.right-palm.2009-2-17
+L3S341	CAAGTGAGAGAG	GTGCCAGCMGCCGCGGTAA	right palm	2009	3	17	subject-1	No	140	subject-1.right-palm.2009-3-17
+L3S360	CATCGTATCAAC	GTGCCAGCMGCCGCGGTAA	right palm	2009	4	14	subject-1	No	168	subject-1.right-palm.2009-4-14
+L5S104	CAGTGTCAGGAC	GTGCCAGCMGCCGCGGTAA	tongue	2008	10	28	subject-1	Yes	0	subject-1.tongue.2008-10-28
+L5S155	ATCTTAGACTGC	GTGCCAGCMGCCGCGGTAA	tongue	2009	1	20	subject-1	No	84	subject-1.tongue.2009-1-20
+L5S174	CAGACATTGCGT	GTGCCAGCMGCCGCGGTAA	tongue	2009	2	17	subject-1	No	112	subject-1.tongue.2009-2-17
+L5S203	CGATGCACCAGA	GTGCCAGCMGCCGCGGTAA	tongue	2009	3	17	subject-1	No	140	subject-1.tongue.2009-3-17
+L5S222	CTAGAGACTCTT	GTGCCAGCMGCCGCGGTAA	tongue	2009	4	14	subject-1	No	168	subject-1.tongue.2009-4-14
+L1S140	ATGGCAGCTCTA	GTGCCAGCMGCCGCGGTAA	gut	2008	10	28	subject-2	Yes	0	subject-2.gut.2008-10-28
+L1S208	CTGAGATACGCG	GTGCCAGCMGCCGCGGTAA	gut	2009	1	20	subject-2	No	84	subject-2.gut.2009-1-20
+L1S257	CCGACTGAGATG	GTGCCAGCMGCCGCGGTAA	gut	2009	3	17	subject-2	No	140	subject-2.gut.2009-3-17
+L1S281	CCTCTCGTGATC	GTGCCAGCMGCCGCGGTAA	gut	2009	4	14	subject-2	No	168	subject-2.gut.2009-4-14
+L2S240	CATATCGCAGTT	GTGCCAGCMGCCGCGGTAA	left palm	2008	10	28	subject-2	Yes	0	subject-2.left-palm.2008-10-28
+L2S309	CGTGCATTATCA	GTGCCAGCMGCCGCGGTAA	left palm	2009	1	20	subject-2	No	84	subject-2.left-palm.2009-1-20
+L2S357	CTAACGCAGTCA	GTGCCAGCMGCCGCGGTAA	left palm	2009	3	17	subject-2	No	140	subject-2.left-palm.2009-3-17
+L2S382	CTCAATGACTCA	GTGCCAGCMGCCGCGGTAA	left palm	2009	4	14	subject-2	No	168	subject-2.left-palm.2009-4-14
+L3S378	ATCGATCTGTGG	GTGCCAGCMGCCGCGGTAA	right palm	2008	10	28	subject-2	Yes	0	subject-2.right-palm.2008-10-28
+L4S63	CTCGTGGAGTAG	GTGCCAGCMGCCGCGGTAA	right palm	2009	1	20	subject-2	No	84	subject-2.right-palm.2009-1-20
+L4S112	GCGTTACACACA	GTGCCAGCMGCCGCGGTAA	right palm	2009	3	17	subject-2	No	140	subject-2.right-palm.2009-3-17
+L4S137	GAACTGTATCTC	GTGCCAGCMGCCGCGGTAA	right palm	2009	4	14	subject-2	No	168	subject-2.right-palm.2009-4-14
+L5S240	CTGGACTCATAG	GTGCCAGCMGCCGCGGTAA	tongue	2008	10	28	subject-2	Yes	0	subject-2.tongue.2008-10-28
+L6S20	GAGGCTCATCAT	GTGCCAGCMGCCGCGGTAA	tongue	2009	1	20	subject-2	No	84	subject-2.tongue.2009-1-20
+L6S68	GATACGTCCTGA	GTGCCAGCMGCCGCGGTAA	tongue	2009	3	17	subject-2	No	140	subject-2.tongue.2009-3-17
+L6S93	GATTAGCACTCT	GTGCCAGCMGCCGCGGTAA	tongue	2009	4	14	subject-2	No	168	subject-2.tongue.2009-4-14
\ No newline at end of file
--- a/skbio/stats/distance/tests/test_permdisp.py
+++ b/skbio/stats/distance/tests/test_permdisp.py
+# ----------------------------------------------------------------------------
+# Copyright (c) 2013--, scikit-bio development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from functools import partial
+from unittest import TestCase, main
+
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from pandas.util.testing import assert_series_equal
+from scipy.stats import f_oneway
+import hdmedians as hd
+
+from skbio import DistanceMatrix
+from skbio.stats.ordination import pcoa
+from skbio.stats.distance import permdisp
+from skbio.stats.distance._permdisp import _compute_groups
+from skbio.util import get_data_path
+
+
+class testPERMDISP(TestCase):
+
+    def setUp(self):
+        # test with 2 groups of equal size
+        # when assigned different labels, results should be the same
+        self.grouping_eq = ['foo', 'foo', 'foo', 'bar', 'bar', 'bar']
+        self.grouping_eq_relab = ['pyt', 'pyt', 'pyt', 'hon', 'hon', 'hon']
+        self.exp_index = ['method name', 'test statistic name', 'sample size',
+                          'number of groups', 'test statistic', 'p-value',
+                          'number of permutations']
+        # test with 3 groups of different sizes
+        # when assigned different labels results should be the same
+        self.grouping_uneq = ['foo', 'foo', 'bar', 'bar', 'bar',
+                              'qw', 'qw', 'qw', 'qw']
+
+        self.grouping_uneq_relab = [12, 12, 7, 7, 7, 23, 23, 23, 23]
+
+        self.grouping_un_mixed = ['a', 'a', 7, 7, 7, 'b', 'b', 'b', 'b']
+
+        eq_ids = ['s1', 's2', 's3', 's4', 's5', 's6']
+        uneq_ids = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9']
+        # matrix for equal grouping
+        self.eq_mat = DistanceMatrix([[0, 4, 0, 0, 4, 2],
+                                      [4, 0, 2, 0, 3, 1],
+                                      [0, 2, 0, 5, 2, 5],
+                                      [0, 0, 5, 0, 0, 2],
+                                      [4, 3, 2, 0, 0, 2],
+                                      [2, 1, 5, 2, 2, 0]], eq_ids)
+
+        # matrix for unequal grouping
+        self.uneq_mat = DistanceMatrix([[0, 0, 4, 0, 0, 3, 5, 3, 0],
+                                        [0, 0, 0, 3, 4, 5, 3, 0, 3],
+                                        [4, 0, 0, 4, 3, 1, 0, 5, 2],
+                                        [0, 3, 4, 0, 0, 2, 1, 3, 5],
+                                        [0, 4, 3, 0, 0, 1, 1, 5, 0],
+                                        [3, 5, 1, 2, 1, 0, 2, 0, 5],
+                                        [5, 3, 0, 1, 1, 2, 0, 4, 3],
+                                        [3, 0, 5, 3, 5, 0, 4, 0, 4],
+                                        [0, 3, 2, 5, 0, 5, 3, 4, 0]], uneq_ids)
+
+        # null matrix for equal grouping
+        self.null_mat = DistanceMatrix([[0, 0, 0, 0, 0, 0],
+                                        [0, 0, 0, 0, 0, 0],
+                                        [0, 0, 0, 0, 0, 0],
+                                        [0, 0, 0, 0, 0, 0],
+                                        [0, 0, 0, 0, 0, 0],
+                                        [0, 0, 0, 0, 0, 0]], eq_ids)
+
+        unif_ids = ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593', 'PC.607',
+                    'PC.634', 'PC.635', 'PC.636']
+
+        self.unifrac_dm = DistanceMatrix(
+            [[0.0, 0.595483768391, 0.618074717633, 0.582763100909,
+              0.566949022108, 0.714717232268, 0.772001731764, 0.690237118413,
+              0.740681707488],
+             [0.595483768391, 0.0, 0.581427669668, 0.613726772383,
+              0.65945132763, 0.745176523638, 0.733836123821, 0.720305073505,
+              0.680785600439],
+             [0.618074717633, 0.581427669668, 0.0, 0.672149021573,
+              0.699416863323, 0.71405573754, 0.759178215168, 0.689701276341,
+              0.725100672826],
+             [0.582763100909, 0.613726772383, 0.672149021573, 0.0,
+              0.64756120797, 0.666018240373, 0.66532968784, 0.650464714994,
+              0.632524644216],
+             [0.566949022108, 0.65945132763, 0.699416863323, 0.64756120797,
+              0.0, 0.703720200713, 0.748240937349, 0.73416971958,
+              0.727154987937],
+             [0.714717232268, 0.745176523638, 0.71405573754, 0.666018240373,
+              0.703720200713, 0.0, 0.707316869557, 0.636288883818,
+              0.699880573956],
+             [0.772001731764, 0.733836123821, 0.759178215168, 0.66532968784,
+              0.748240937349, 0.707316869557, 0.0, 0.565875193399,
+              0.560605525642],
+             [0.690237118413, 0.720305073505, 0.689701276341, 0.650464714994,
+              0.73416971958, 0.636288883818, 0.565875193399, 0.0,
+              0.575788039321],
+             [0.740681707488, 0.680785600439, 0.725100672826, 0.632524644216,
+              0.727154987937, 0.699880573956, 0.560605525642, 0.575788039321,
+              0.0]], unif_ids)
+
+        self.unif_grouping = ['Control', 'Control', 'Control', 'Control',
+                              'Control', 'Fast', 'Fast', 'Fast', 'Fast']
+
+        self.assert_series_equal = partial(assert_series_equal,
+                                           check_index_type=True,
+                                           check_series_type=True)
+
+    def test_centroids_eq_groups(self):
+        exp = [[1.2886811963240687, 1.890538910062923, 1.490527658097728],
+               [2.17349240061718, 2.3192679626679946, 2.028338553903792]]
+        exp_stat, _ = f_oneway(*exp)
+
+        dm = pcoa(self.eq_mat)
+        dm = dm.samples
+
+        obs = _compute_groups(dm, 'centroid', self.grouping_eq)
+        self.assertAlmostEqual(obs, exp_stat, places=6)
+
+        obs_relab = _compute_groups(dm, 'centroid', self.grouping_eq_relab)
+        self.assertAlmostEqual(obs_relab, obs, places=6)
+
+    def test_centroids_uneq_groups(self):
+        """
+        the expected result here was calculated by hand
+        """
+        exp = [[2.5847022428144935, 2.285624595858895,
+                1.7022431146340287],
+               [1.724817266046108, 1.724817266046108],
+               [2.4333280644972795, 2.389000390879655,
+                2.8547180589306036, 3.218568759338847]]
+        exp_stat, _ = f_oneway(*exp)
+
+        dm = pcoa(self.uneq_mat)
+        dm = dm.samples
+
+        obs = _compute_groups(dm, 'centroid', self.grouping_uneq)
+        self.assertAlmostEqual(obs, exp_stat, places=6)
+
+        obs_relab = _compute_groups(dm, 'centroid', self.grouping_uneq_relab)
+        self.assertAlmostEqual(obs, obs_relab, places=6)
+
+    def test_centroids_mixedgroups(self):
+        exp = [[2.5847022428144935, 2.285624595858895,
+                1.7022431146340287],
+               [1.724817266046108, 1.724817266046108],
+               [2.4333280644972795, 2.389000390879655,
+                2.8547180589306036, 3.218568759338847]]
+        dm = pcoa(self.uneq_mat)
+        dm = dm.samples
+
+        exp_stat, _ = f_oneway(*exp)
+
+        obs_mixed = _compute_groups(dm, 'centroid', self.grouping_un_mixed)
+        self.assertAlmostEqual(exp_stat, obs_mixed, places=6)
+
+    def test_centroids_null(self):
+        dm = pcoa(self.null_mat)
+        dm = dm.samples
+
+        obs_null = _compute_groups(dm, 'centroid', self.grouping_eq)
+        np.isnan(obs_null)
+
+    def test_centroid_normal(self):
+        exp = pd.Series(index=self.exp_index,
+                        data=['PERMDISP', 'F-value', 9, 2, 0.244501519876,
+                              0.63, 99],
+                        name='PERMDISP results')
+
+        grouping = ['Control', 'Control', 'Control', 'Control', 'Control',
+                    'Fast', 'Fast', 'Fast', 'Fast']
+
+        np.random.seed(0)
+        obs = permdisp(self.unifrac_dm, grouping, test='centroid',
+                       permutations=99)
+
+        self.assert_series_equal(obs, exp)
+
+    def test_median_normal(self):
+
+        exp = pd.Series(index=self.exp_index,
+                        data=['PERMDISP', 'F-value', 9, 2, 0.139475441876,
+                              0.61, 99],
+                        name='PERMDISP results')
+
+        np.random.seed(0)
+        obs = permdisp(self.unifrac_dm, self.unif_grouping, test='median',
+                       permutations=99)
+
+        self.assert_series_equal(obs, exp)
+
+    def test_not_distance_matrix(self):
+        dm = []
+        grouping = ['Control', 'Control', 'Control', 'Control', 'Control',
+                    'Fast', 'Fast', 'Fast', 'Fast']
+
+        npt.assert_raises(TypeError, permdisp, dm, grouping, permutations=0)
+
+    def test_mismatched_group(self):
+
+        gr = ['foo', 'bar']
+        npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr)
+
+    def test_single_group(self):
+
+        gr = ['f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
+        npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr)
+
+    def test_no_permuations(self):
+        obs = permdisp(self.eq_mat, self.grouping_eq, permutations=0)
+
+        pval = obs['p-value']
+        np.isnan(pval)
+
+    def test_hdmedians(self):
+        exp = np.array([2.01956244, 1.53164546, 2.60571752, 0.91424179,
+                        1.76214416, 1.69943057])
+        obs = np.array(hd.geomedian(self.eq_mat.data))
+        npt.assert_almost_equal(obs, exp, decimal=6)
+
+    def test_confirm_betadispr_results(self):
+        mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv'))
+        mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t')
+        mp_mf.set_index('#SampleID', inplace=True)
+
+        obs_med_mp = permdisp(mp_dm, mp_mf,
+                              column='BodySite')
+        obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite',
+                              test='centroid')
+
+        exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999]
+        exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999]
+        exp_ind = ['method name', 'test statistic name', 'sample size',
+                   'number of groups', 'test statistic', 'p-value',
+                   'number of permutations']
+
+        exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object',
+                               name='PERMDISP results')
+
+        exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object',
+                               name='PERMDISP results')
+
+        self.assert_series_equal(exp_med_mp, obs_med_mp)
+
+        self.assert_series_equal(exp_cen_mp, obs_cen_mp)
+
+
+if __name__ == '__main__':
+    main()
--- a/skbio/stats/ordination/__init__.py
+++ b/skbio/stats/ordination/__init__.py
@@ -17,6 +17,7 @@ Functions

   ca
   pcoa
+   pcoa_biplot
   cca
   rda
   mean_and_std
@@ -126,11 +127,11 @@ from skbio.util import TestRunner
 from ._redundancy_analysis import rda
 from ._correspondence_analysis import ca
 from ._canonical_correspondence_analysis import cca
-from ._principal_coordinate_analysis import pcoa
+from ._principal_coordinate_analysis import pcoa, pcoa_biplot
 from ._ordination_results import OrdinationResults
 from ._utils import (mean_and_std, scale, svd_rank, corr, e_matrix, f_matrix)

-__all__ = ['ca', 'rda', 'cca', 'pcoa', 'OrdinationResults',
+__all__ = ['ca', 'rda', 'cca', 'pcoa', 'pcoa_biplot', 'OrdinationResults',
           'mean_and_std', 'scale', 'svd_rank', 'corr',
           'e_matrix', 'f_matrix']