Skip to content
Commits on Source (3)
# scikit-bio changelog
## Version 0.5.4 (2018-08-23)
### Features
* Added `FSVD`, an alternative fast heuristic method to perform Principal Coordinates Analysis, to `skbio.stats.ordination.pcoa`.
### Backward-incompatible changes [stable]
### Backward-incompatible changes [experimental]
### Performance enhancements
* Added optimized utility methods `f_matrix_inplace` and `e_matrix_inplace` which perform `f_matrix` and `e_matrix` computations in-place and are used by the new `center_distance_matrix` method in `skbio.stats.ordination`.
### Bug fixes
### Deprecated functionality [stable]
### Deprecated functionality [experimental]
### Miscellaneous
## Version 0.5.3 (2018-08-07)
### Features
......
python-skbio (0.5.3-1) UNRELEASED; urgency=medium
python-skbio (0.5.4-1) UNRELEASED; urgency=medium
* New upstream version
* Standards-Version: 4.2.0
* Build-Depends: python3-hdmedians
-- Andreas Tille <tille@debian.org> Wed, 15 Aug 2018 15:21:02 +0200
-- Andreas Tille <tille@debian.org> Tue, 28 Aug 2018 18:14:05 +0200
python-skbio (0.5.2-1) unstable; urgency=medium
......
......@@ -26,7 +26,7 @@ __all__ = ['Sequence', 'DNA', 'RNA', 'Protein', 'GeneticCode',
'TreeNode', 'nj', 'read', 'write', 'OrdinationResults']
__credits__ = "https://github.com/biocore/scikit-bio/graphs/contributors"
__version__ = "0.5.3"
__version__ = "0.5.4"
mottos = [
# 03/15/2014
......
......@@ -129,10 +129,11 @@ from ._correspondence_analysis import ca
from ._canonical_correspondence_analysis import cca
from ._principal_coordinate_analysis import pcoa, pcoa_biplot
from ._ordination_results import OrdinationResults
from ._utils import (mean_and_std, scale, svd_rank, corr, e_matrix, f_matrix)
from ._utils import (mean_and_std, scale, svd_rank, corr, e_matrix, f_matrix,
center_distance_matrix)
__all__ = ['ca', 'rda', 'cca', 'pcoa', 'pcoa_biplot', 'OrdinationResults',
'mean_and_std', 'scale', 'svd_rank', 'corr',
'e_matrix', 'f_matrix']
'e_matrix', 'f_matrix', 'center_distance_matrix']
test = TestRunner(__file__).test
......@@ -6,33 +6,29 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
from warnings import warn
import pandas as pd
import numpy as np
import pandas as pd
from numpy import dot, hstack
from numpy.linalg import qr, svd
from numpy.random import standard_normal
from scipy.linalg import eigh
from warnings import warn
from skbio.stats.distance import DistanceMatrix
from skbio.util._decorator import experimental
from ._ordination_results import OrdinationResults
from ._utils import e_matrix, f_matrix, scale
# - In cogent, after computing eigenvalues/vectors, the imaginary part
# is dropped, if any. We know for a fact that the eigenvalues are
# real, so that's not necessary, but eigenvectors can in principle
# be complex (see for example
# http://math.stackexchange.com/a/47807/109129 for details) and in
# that case dropping the imaginary part means they'd no longer be
# so, so I'm not doing that.
from ._utils import center_distance_matrix, scale
@experimental(as_of="0.4.0")
def pcoa(distance_matrix):
def pcoa(distance_matrix, method="eigh", number_of_dimensions=0,
inplace=False):
r"""Perform Principal Coordinate Analysis.
Principal Coordinate Analysis (PCoA) is a method similar to PCA
that works from distance matrices, and so it can be used with
ecologically meaningful distances like UniFrac for bacteria.
Principal Coordinate Analysis (PCoA) is a method similar
to Principal Components Analysis (PCA) with the difference that PCoA
operates on distance matrices, typically with non-euclidian and thus
ecologically meaningful distances like UniFrac in microbiome research.
In ecology, the euclidean distance preserved by Principal
Component Analysis (PCA) is often not a good choice because it
......@@ -44,10 +40,36 @@ def pcoa(distance_matrix):
species is present in two sites, that means that the sites are
similar.).
Note that the returned eigenvectors are not normalized to unit length.
Parameters
----------
distance_matrix : DistanceMatrix
A distance matrix.
method : str, optional
Eigendecomposition method to use in performing PCoA.
By default, uses SciPy's `eigh`, which computes exact
eigenvectors and eigenvalues for all dimensions. The alternate
method, `fsvd`, uses faster heuristic eigendecomposition but loses
accuracy. The magnitude of accuracy lost is dependent on dataset.
number_of_dimensions : int, optional
Dimensions to reduce the distance matrix to. This number determines
how many eigenvectors and eigenvalues will be returned.
By default, equal to the number of dimensions of the distance matrix,
as default eigendecomposition using SciPy's `eigh` method computes
all eigenvectors and eigenvalues. If using fast heuristic
eigendecomposition through `fsvd`, a desired number of dimensions
should be specified. Note that the default eigendecomposition
method `eigh` does not natively support a specifying number of
dimensions to reduce a matrix to, so if this parameter is specified,
all eigenvectors and eigenvalues will be simply be computed with no
speed gain, and only the number specified by `number_of_dimensions`
will be returned. Specifying a value of `0`, the default, will
set `number_of_dimensions` equal to the number of dimensions of the
specified `distance_matrix`.
inplace : bool, optional
If true, centers a distance matrix in-place in a manner that reduces
memory consumption.
Returns
-------
......@@ -62,37 +84,56 @@ def pcoa(distance_matrix):
Notes
-----
It is sometimes known as metric multidimensional scaling or
classical scaling.
.. note::
If the distance is not euclidean (for example if it is a
.. note:: If the distance is not euclidean (for example if it is a
semimetric and the triangle inequality doesn't hold),
negative eigenvalues can appear. There are different ways
to deal with that problem (see Legendre & Legendre 1998, \S
9.2.3), but none are currently implemented here.
However, a warning is raised whenever negative eigenvalues
appear, allowing the user to decide if they can be safely
ignored.
"""
distance_matrix = DistanceMatrix(distance_matrix)
E_matrix = e_matrix(distance_matrix.data)
# If the used distance was euclidean, pairwise distances
# needn't be computed from the data table Y because F_matrix =
# Y.dot(Y.T) (if Y has been centred).
F_matrix = f_matrix(E_matrix)
# If the eigendecomposition ever became a bottleneck, it could
# be replaced with an iterative version that computes the
# largest k eigenvectors.
eigvals, eigvecs = eigh(F_matrix)
# eigvals might not be ordered, so we order them (at least one
# is zero). cogent makes eigenvalues positive by taking the
# Center distance matrix, a requirement for PCoA here
matrix_data = center_distance_matrix(distance_matrix.data, inplace=inplace)
# If no dimension specified, by default will compute all eigenvectors
# and eigenvalues
if number_of_dimensions == 0:
if method == "fsvd" and matrix_data.shape[0] > 10:
warn("FSVD: since no value for number_of_dimensions is specified, "
"PCoA for all dimensions will be computed, which may "
"result in long computation time if the original "
"distance matrix is large.", RuntimeWarning)
# distance_matrix is guaranteed to be square
number_of_dimensions = matrix_data.shape[0]
elif number_of_dimensions < 0:
raise ValueError('Invalid operation: cannot reduce distance matrix '
'to negative dimensions using PCoA. Did you intend '
'to specify the default value "0", which sets '
'the number_of_dimensions equal to the '
'dimensionality of the given distance matrix?')
# Perform eigendecomposition
if method == "eigh":
# eigh does not natively support specifying number_of_dimensions, i.e.
# there are no speed gains unlike in FSVD. Later, we slice off unwanted
# dimensions to conform the result of eigh to the specified
# number_of_dimensions.
eigvals, eigvecs = eigh(matrix_data)
long_method_name = "Principal Coordinate Analysis"
elif method == "fsvd":
eigvals, eigvecs = _fsvd(matrix_data, number_of_dimensions)
long_method_name = "Approximate Principal Coordinate Analysis " \
"using FSVD"
else:
raise ValueError(
"PCoA eigendecomposition method {} not supported.".format(method))
# cogent makes eigenvalues positive by taking the
# abs value, but that doesn't seem to be an approach accepted
# by L&L to deal with negative eigenvalues. We raise a warning
# in that case. First, we make values close to 0 equal to 0.
......@@ -110,17 +151,13 @@ def pcoa(distance_matrix):
eigvals.max()),
RuntimeWarning
)
# eigvals might not be ordered, so we first sort them, then analogously
# sort the eigenvectors by the ordering of the eigenvalues too
idxs_descending = eigvals.argsort()[::-1]
eigvals = eigvals[idxs_descending]
eigvecs = eigvecs[:, idxs_descending]
# Scale eigenvalues to have lenght = sqrt(eigenvalue). This
# works because np.linalg.eigh returns normalized
# eigenvectors. Each row contains the coordinates of the
# objects in the space of principal coordinates. Note that at
# least one eigenvalue is zero because only n-1 axes are
# needed to represent n points in an euclidean space.
# If we return only the coordinates that make sense (i.e., that have a
# corresponding positive eigenvalue), then Jackknifed Beta Diversity
# won't work as it expects all the OrdinationResults to have the same
......@@ -130,13 +167,46 @@ def pcoa(distance_matrix):
eigvecs[:, num_positive:] = np.zeros(eigvecs[:, num_positive:].shape)
eigvals[num_positive:] = np.zeros(eigvals[num_positive:].shape)
if method == "fsvd":
# Since the dimension parameter, hereafter referred to as 'd',
# restricts the number of eigenvalues and eigenvectors that FSVD
# computes, we need to use an alternative method to compute the sum
# of all eigenvalues, used to compute the array of proportions
# explained. Otherwise, the proportions calculated will only be
# relative to d number of dimensions computed; whereas we want
# it to be relative to the entire dimensionality of the
# centered distance matrix.
# An alternative method of calculating th sum of eigenvalues is by
# computing the trace of the centered distance matrix.
# See proof outlined here: https://goo.gl/VAYiXx
sum_eigenvalues = np.trace(matrix_data)
else:
# Calculate proportions the usual way
sum_eigenvalues = np.sum(eigvals)
proportion_explained = eigvals / sum_eigenvalues
# In case eigh is used, eigh computes all eigenvectors and -values.
# So if number_of_dimensions was specified, we manually need to ensure
# only the requested number of dimensions
# (number of eigenvectors and eigenvalues, respectively) are returned.
eigvecs = eigvecs[:, :number_of_dimensions]
eigvals = eigvals[:number_of_dimensions]
proportion_explained = proportion_explained[:number_of_dimensions]
# Scale eigenvalues to have length = sqrt(eigenvalue). This
# works because np.linalg.eigh returns normalized
# eigenvectors. Each row contains the coordinates of the
# objects in the space of principal coordinates. Note that at
# least one eigenvalue is zero because only n-1 axes are
# needed to represent n points in a euclidean space.
coordinates = eigvecs * np.sqrt(eigvals)
proportion_explained = eigvals / eigvals.sum()
axis_labels = ['PC%d' % i for i in range(1, eigvals.size + 1)]
axis_labels = ["PC%d" % i for i in range(1, number_of_dimensions + 1)]
return OrdinationResults(
short_method_name='PCoA',
long_method_name='Principal Coordinate Analysis',
short_method_name="PCoA",
long_method_name=long_method_name,
eigvals=pd.Series(eigvals, index=axis_labels),
samples=pd.DataFrame(coordinates, index=distance_matrix.ids,
columns=axis_labels),
......@@ -144,6 +214,130 @@ def pcoa(distance_matrix):
index=axis_labels))
def _fsvd(centered_distance_matrix, number_of_dimensions=10):
"""
Performs singular value decomposition, or more specifically in
this case eigendecomposition, using fast heuristic algorithm
nicknamed "FSVD" (FastSVD), adapted and optimized from the algorithm
described by Halko et al (2011).
Parameters
----------
centered_distance_matrix : np.array
Numpy matrix representing the distance matrix for which the
eigenvectors and eigenvalues shall be computed
number_of_dimensions : int
Number of dimensions to keep. Must be lower than or equal to the
rank of the given distance_matrix.
Returns
-------
np.array
Array of eigenvectors, each with number_of_dimensions length.
np.array
Array of eigenvalues, a total number of number_of_dimensions.
Notes
-----
The algorithm is based on 'An Algorithm for the Principal
Component analysis of Large Data Sets'
by N. Halko, P.G. Martinsson, Y. Shkolnisky, and M. Tygert.
Original Paper: https://arxiv.org/abs/1007.5510
Ported from MATLAB implementation described here:
https://stats.stackexchange.com/a/11934/211065
"""
m, n = centered_distance_matrix.shape
# Number of levels of the Krylov method to use.
# For most applications, num_levels=1 or num_levels=2 is sufficient.
num_levels = 1
# Changes the power of the spectral norm, thus minimizing the error).
use_power_method = False
# Note: a (conjugate) transpose is removed for performance, since we
# only expect square matrices.
if m != n:
raise ValueError('FSVD expects square distance matrix')
if number_of_dimensions > m or number_of_dimensions > n:
raise ValueError('FSVD: number_of_dimensions cannot be larger than'
' the dimensionality of the given distance matrix.')
if number_of_dimensions < 0:
raise ValueError('Invalid operation: cannot reduce distance matrix '
'to negative dimensions using PCoA. Did you intend '
'to specify the default value "0", which sets '
'the number_of_dimensions equal to the '
'dimensionality of the given distance matrix?')
k = number_of_dimensions + 2
# Form a real nxl matrix G whose entries are independent, identically
# distributed Gaussian random variables of zero mean and unit variance
G = standard_normal(size=(n, k))
if use_power_method:
# use only the given exponent
H = dot(centered_distance_matrix, G)
for x in range(2, num_levels + 2):
# enhance decay of singular values
# note: distance_matrix is no longer transposed, saves work
# since we're expecting symmetric, square matrices anyway
# (Daniel McDonald's changes)
H = dot(centered_distance_matrix, dot(centered_distance_matrix, H))
else:
# compute the m x l matrices H^{(0)}, ..., H^{(i)}
# Note that this is done implicitly in each iteration below.
H = dot(centered_distance_matrix, G)
# to enhance performance
H = hstack(
(H,
dot(centered_distance_matrix, dot(centered_distance_matrix, H))))
for x in range(3, num_levels + 2):
tmp = dot(centered_distance_matrix,
dot(centered_distance_matrix, H))
H = hstack(
(H, dot(centered_distance_matrix,
dot(centered_distance_matrix, tmp))))
# Using the pivoted QR-decomposition, form a real m * ((i+1)l) matrix Q
# whose columns are orthonormal, s.t. there exists a real
# ((i+1)l) * ((i+1)l) matrix R for which H = QR
Q, R = qr(H)
# Compute the n * ((i+1)l) product matrix T = A^T Q
T = dot(centered_distance_matrix, Q) # step 3
# Form an SVD of T
Vt, St, W = svd(T, full_matrices=False)
W = W.transpose()
# Compute the m * ((i+1)l) product matrix
Ut = dot(Q, W)
U_fsvd = Ut[:, :number_of_dimensions]
S = St[:number_of_dimensions]
# drop imaginary component, if we got one
# Note:
# In cogent, after computing eigenvalues/vectors, the imaginary part
# is dropped, if any. We know for a fact that the eigenvalues are
# real, so that's not necessary, but eigenvectors can in principle
# be complex (see for example
# http://math.stackexchange.com/a/47807/109129 for details)
eigenvalues = S.real
eigenvectors = U_fsvd.real
return eigenvalues, eigenvectors
@experimental(as_of="0.5.3")
def pcoa_biplot(ordination, y):
"""Compute the projection of descriptors into a PCoA matrix
......
......@@ -196,3 +196,86 @@ def f_matrix(E_matrix):
col_means = E_matrix.mean(axis=0, keepdims=True)
matrix_mean = E_matrix.mean()
return E_matrix - row_means - col_means + matrix_mean
def center_distance_matrix(distance_matrix, inplace=False):
"""
Centers a distance matrix.
Note: If the used distance was euclidean, pairwise distances
needn't be computed from the data table Y because F_matrix =
Y.dot(Y.T) (if Y has been centered).
But since we're expecting distance_matrix to be non-euclidian,
we do the following computation as per
Numerical Ecology (Legendre & Legendre 1998).
Parameters
----------
distance_matrix : 2D array_like
Distance matrix.
inplace : bool, optional
Whether or not to center the given distance matrix in-place, which
is more efficient in terms of memory and computation.
"""
if inplace:
return _f_matrix_inplace(_e_matrix_inplace(distance_matrix))
else:
return f_matrix(e_matrix(distance_matrix))
def _e_matrix_inplace(distance_matrix):
"""
Compute E matrix from a distance matrix inplace.
Squares and divides by -2 the input element-wise. Eq. 9.20 in
Legendre & Legendre 1998.
Modified from :func:`skbio.stats.ordination.e_matrix` function,
performing row-wise operations to avoid excessive memory allocations.
Parameters
----------
distance_matrix : 2D array_like
Distance matrix.
"""
distance_matrix = distance_matrix.astype(np.float)
for i in np.arange(len(distance_matrix)):
distance_matrix[i] = (distance_matrix[i] * distance_matrix[i]) / -2
return distance_matrix
def _f_matrix_inplace(e_matrix):
"""
Compute F matrix from E matrix inplace.
Centering step: for each element, the mean of the corresponding
row and column are subtracted, and the mean of the whole
matrix is added. Eq. 9.21 in Legendre & Legendre 1998.
Modified from :func:`skbio.stats.ordination.f_matrix` function,
performing row-wise operations to avoid excessive memory allocations.
Parameters
----------
e_matrix : 2D array_like
A matrix representing the "E matrix" as described above.
"""
e_matrix = e_matrix.astype(np.float)
row_means = np.zeros(len(e_matrix), dtype=float)
col_means = np.zeros(len(e_matrix), dtype=float)
matrix_mean = 0.0
for i in np.arange(len(e_matrix)):
row_means[i] = e_matrix[i].mean()
matrix_mean += e_matrix[i].sum()
col_means += e_matrix[i]
matrix_mean /= len(e_matrix) ** 2
col_means /= len(e_matrix)
for i in np.arange(len(e_matrix)):
v = e_matrix[i]
v -= row_means[i]
v -= col_means
v += matrix_mean
e_matrix[i] = v
return e_matrix
1 2 3 4 5 6 7 8 9 10 11 12
1 0.0 0.909636950834479 0.9869428157422727 0.7190893636533138 0.6960053958431593 0.6477238596907942 0.30557161358653495 0.8946966124829346 0.12780699944110363 0.39915339691165386 0.7641239434153432 0.6248070796484706
2 0.909636950834479 0.0 0.3871354292850083 0.6881160960373599 0.5550593584027527 0.5855786600007656 0.4843215561734061 0.20448304199758327 0.4067028703340123 0.2754701840044086 0.6269219445617967 0.05629366991581264
3 0.9869428157422727 0.3871354292850083 0.0 0.6088130886750466 0.8611896463567201 0.2815827949525225 0.6500535832888426 0.8196046614443331 0.356410088497226 0.05164123821334954 0.7110953188954077 0.32855281988632934
4 0.7190893636533138 0.6881160960373599 0.6088130886750466 0.0 0.7453215102240474 0.9916540031629704 0.14394284428694282 0.8388378539413649 0.15115603934799038 0.13871462268568635 0.1934605692727246 0.9804118301943398
5 0.6960053958431593 0.5550593584027527 0.8611896463567201 0.7453215102240474 0.0 0.7996611932937304 0.30579824243478326 0.5227960305398314 0.8564730629853469 0.7786384040043949 0.06843106040159719 0.7715973816221341
6 0.6477238596907942 0.5855786600007656 0.2815827949525225 0.9916540031629704 0.7996611932937304 0.0 0.8869204659721949 0.1619378942802252 0.10200764546980268 0.17805335055828198 0.8796559972720953 0.20933243431218862
7 0.30557161358653495 0.4843215561734061 0.6500535832888426 0.14394284428694282 0.30579824243478326 0.8869204659721949 0.0 0.9489770186788868 0.9531210051205121 0.5834385348164726 0.31984891724102216 0.5852822100268925
8 0.8946966124829346 0.20448304199758327 0.8196046614443331 0.8388378539413649 0.5227960305398314 0.1619378942802252 0.9489770186788868 0.0 0.16681381452925792 0.9281238242741929 0.604480007052297 0.43978806925866687
9 0.12780699944110363 0.4067028703340123 0.356410088497226 0.15115603934799038 0.8564730629853469 0.10200764546980268 0.9531210051205121 0.16681381452925792 0.0 0.10766387594368387 0.9552101788877516 0.6135541732435132
10 0.39915339691165386 0.2754701840044086 0.05164123821334954 0.13871462268568635 0.7786384040043949 0.17805335055828198 0.5834385348164726 0.9281238242741929 0.10766387594368387 0.0 0.4157921207508062 0.31997143485314194
11 0.7641239434153432 0.6269219445617967 0.7110953188954077 0.1934605692727246 0.06843106040159719 0.8796559972720953 0.31984891724102216 0.604480007052297 0.9552101788877516 0.4157921207508062 0.0 0.11189976154475101
12 0.6248070796484706 0.05629366991581264 0.32855281988632934 0.9804118301943398 0.7715973816221341 0.20933243431218862 0.5852822100268925 0.43978806925866687 0.6135541732435132 0.31997143485314194 0.11189976154475101 0.0
Eigvals 3
0.5123672604605051 0.30071909442702155 0.26791206600414047
Proportion explained 3
0.26757383277657976 0.15704469604990076 0.13991186377402365
Species 0 0
Site 9 3
PC.636 -0.2584654611828421 0.17399954688273872 -0.03828757925519412
PC.635 -0.27100113539100934 -0.018595131906339258 0.08648419263485663
PC.356 0.23507789817473093 0.09625192544887005 0.34579272671386985
PC.481 0.026140766432533755 -0.011145967653319655 -0.14766060301460787
PC.354 0.2850075522831216 -0.019254988848331687 -0.062326337538532166
PC.593 0.20463632624145514 -0.13936115093164073 -0.29151381962286704
PC.355 0.23348240321199026 0.22525797406849954 0.018862309626814944
PC.607 -0.09496319113225934 -0.4209748024953033 0.15486945486941445
PC.634 -0.35991515863772167 0.11382259543482587 -0.06622034441375392
Biplot 0 0
Site constraints 0 0
Eigvals 3
0.5123672604605054 0.3007190944270222 0.2679120660041405
Proportion explained 3
0.2675738327765797 0.15704469604990098 0.13991186377402356
Species 0 0
Site 9 3
PC.636 -0.2584654611828421 0.17399954688273822 -0.03828757925519378
PC.635 -0.2710011353910087 -0.018595131906339074 0.08648419263485721
PC.356 0.23507789817473107 0.0962519254488692 0.3457927267138707
PC.481 0.026140766432533644 -0.011145967653318808 -0.14766060301460796
PC.354 0.28500755228312136 -0.019254988848331694 -0.06232633753853236
PC.593 0.20463632624145525 -0.13936115093164014 -0.2915138196228672
PC.355 0.23348240321199093 0.22525797406849954 0.018862309626815132
PC.607 -0.09496319113225955 -0.4209748024953044 0.1548694548694131
PC.634 -0.35991515863772144 0.1138225954348267 -0.06622034441375402
Biplot 0 0
Site constraints 0 0
......@@ -6,9 +6,9 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import pandas as pd
import numpy as np
import numpy.testing as npt
import pandas as pd
from copy import deepcopy
from unittest import TestCase, main
......@@ -53,6 +53,73 @@ class TestPCoA(TestCase):
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True)
def test_fsvd_inplace(self):
dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
inplace=True)
results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=True)
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True,
ignore_method_names=True)
def test_fsvd(self):
dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm3 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
# Test eigh vs. fsvd pcoa and inplace parameter
expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
inplace=False)
results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=False)
results_inplace = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=True)
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True,
ignore_method_names=True)
assert_ordination_results_equal(results, results_inplace,
ignore_directionality=True,
ignore_method_names=True)
# Test number_of_dimensions edge cases
results2 = pcoa(dm3, method="fsvd", number_of_dimensions=0,
inplace=False)
expected_results2 = pcoa(dm3, method="fsvd",
number_of_dimensions=dm3.data.shape[0],
inplace=False)
assert_ordination_results_equal(results2, expected_results2,
ignore_directionality=True,
ignore_method_names=True)
with self.assertRaises(ValueError):
dim_too_large = dm1.data.shape[0] + 10
pcoa(dm2, method="fsvd", number_of_dimensions=dim_too_large)
with self.assertRaises(ValueError):
pcoa(dm2, method="fsvd", number_of_dimensions=-1)
with self.assertRaises(ValueError):
dim_too_large = dm1.data.shape[0] + 10
pcoa(dm2, method="eigh", number_of_dimensions=dim_too_large)
with self.assertRaises(ValueError):
pcoa(dm2, method="eigh", number_of_dimensions=-1)
dm_big = DistanceMatrix.read(get_data_path('PCoA_sample_data_12dim'))
with self.assertWarnsRegex(RuntimeWarning,
"no value for number_of_dimensions"):
pcoa(dm_big, method="fsvd", number_of_dimensions=0)
def test_extensive(self):
eigvals = [0.3984635, 0.36405689, 0.28804535, 0.27479983,
0.19165361, 0.0]
......@@ -203,7 +270,8 @@ class TestPCoABiplot(TestCase):
self.descriptors.index = pd.Index(new_index)
with self.assertRaisesRegex(ValueError, 'The eigenvectors and the '
'descriptors must describe the same '
'descriptors must describe '
'the same '
'samples.'):
pcoa_biplot(self.ordination, self.descriptors)
......@@ -211,7 +279,8 @@ class TestPCoABiplot(TestCase):
self.ordination.short_method_name = 'RDA'
self.ordination.long_method_name = 'Redundancy Analysis'
with self.assertRaisesRegex(ValueError, 'This biplot computation can'
' only be performed in a PCoA matrix.'):
' only be performed in a '
'PCoA matrix.'):
pcoa_biplot(self.ordination, self.descriptors)
def test_from_seralized_results(self):
......
......@@ -6,12 +6,16 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
from unittest import TestCase, main
import copy
import numpy as np
import numpy.testing as npt
from unittest import TestCase, main
from skbio.stats.ordination import corr, mean_and_std, e_matrix, f_matrix, \
center_distance_matrix
from skbio.stats.ordination import corr, mean_and_std, e_matrix, f_matrix
from skbio.stats.ordination._utils import _e_matrix_inplace, _f_matrix_inplace
class TestUtils(TestCase):
......@@ -61,6 +65,37 @@ class TestUtils(TestCase):
# Note that `test_make_F_matrix` in cogent is wrong
npt.assert_almost_equal(F, expected_F)
def test_e_matrix_inplace(self):
E = _e_matrix_inplace(self.matrix)
expected_E = np.array([[-0.5, -2., -4.5],
[-8., -12.5, -18.]])
npt.assert_almost_equal(E, expected_E)
def test_f_matrix_inplace(self):
F = _f_matrix_inplace(self.matrix2)
expected_F = np.zeros((3, 3))
npt.assert_almost_equal(F, expected_F)
def test_center_distance_matrix_inplace(self):
dm_expected = f_matrix(e_matrix(self.small_mat))
# make copy of matrix to test inplace centering
matrix_copy = copy.deepcopy(self.small_mat)
dm_centered = center_distance_matrix(matrix_copy, inplace=False)
# ensure that matrix_copy was NOT modified inplace
self.assertTrue(np.array_equal(matrix_copy, self.small_mat))
# and ensure that the result of centering was correct
npt.assert_almost_equal(dm_expected, dm_centered)
# next, sort same matrix inplace
matrix_copy2 = copy.deepcopy(self.small_mat)
dm_centered_inp = center_distance_matrix(matrix_copy2, inplace=True)
# and ensure that the result of inplace centering was correct
npt.assert_almost_equal(dm_expected, dm_centered_inp)
if __name__ == '__main__':
main()
......@@ -6,9 +6,9 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import os
import inspect
import warnings
import inspect
import os
import nose
import numpy as np
......@@ -80,6 +80,7 @@ class TestRunner:
and ugly. This class invokes nose with the required options.
"""
@experimental(as_of="0.4.0")
def __init__(self, filename):
self._filename = filename
......