# License: MIT
from .base import BaseEmbed
from ..utils.utils import check_Xs
import warnings
import numpy as np
from sklearn.metrics import euclidean_distances
[docs]class MVMDS(BaseEmbed):
r"""
An implementation of Classical Multiview Multidimensional Scaling for
jointly reducing the dimensions of multiple views of data [#1MVMDS]_.
A Euclidean distance matrix is created for each view, double centered,
and the k largest common eigenvectors between the matrices are found
based on the stepwise estimation of common principal components. Using
these common principal components, the views are jointly reduced and
a single view of k-dimensions is returned.
MVMDS is often a better alternative to PCA for multi-view data.
See the ``tutorials`` in the documentation.
Parameters
----------
n_components : int (positive), default=2
Represents the number of components that the user would like to
be returned from the algorithm. This value must be greater than
0 and less than the number of samples within each view.
num_iter: int (positive), default=15
Number of iterations stepwise estimation goes through.
dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'
Dissimilarity measure to use:
'euclidean':
Pairwise Euclidean distances between points in the dataset.
'precomputed':
Xs is treated as pre-computed dissimilarity matrices.
Attributes
----------
components_: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
Notes
-----
Classical Multiview Multidimensional Scaling can be broken down into two
steps. The first step involves calculating the Euclidean Distance matrices,
:math:`Z_i`, for each of the :math:`k` views and double-centering
these matrices through the following calculations:
.. math::
\Sigma_{i}=-\frac{1}{2}J_iZ_iJ_i
.. math::
\text{where }J_i=I_i-{\frac {1}{n}}\mathbb{1}\mathbb{1}^T
The second step involves finding the common principal components of the
:math:`\Sigma` matrices. These can be thought of as multiview
generalizations of the principal components found in principal component
analysis (PCA) given several covariance matrices. The central hypothesis of
the common principal component model states that given k normal populations
(views), their :math:`p` x :math:`p` covariance matrices
:math:`\Sigma_{i}`, for :math:`i = 1,2,...,k` are simultaneously
diagonalizable as:
.. math::
\Sigma_{i} = QD_i^2Q^T
where :math:`Q` is the common :math:`p` x :math:`p` orthogonal matrix and
:math:`D_i^2` are positive :math:`p` x :math:`p` diagonal matrices. The
:math:`Q` matrix contains all the common principal components. The common
principal component, :math:`q_j`, is found by solving the minimization
problem:
.. math::
\text{Minimize} \sum_{i=1}^{k}n_ilog(q_j^TS_iq_j)
.. math::
\text{Subject to } q_j^Tq_j = 1
where :math:`n_i` represent the degrees of freedom and :math:`S_i`
represent sample covariance matrices.
This class does not support ``MVMDS.transform()`` due to the iterative
nature of the algorithm and the fact that the transformation is done
during iterative fitting. Use ``MVMDS.fit_transform()`` to do both
fitting and transforming at once.
Examples
--------
>>> from mvlearn.embed import MVMDS
>>> from mvlearn.datasets import load_UCImultifeature
>>> Xs, _ = load_UCImultifeature()
>>> print(len(Xs)) # number of samples in each view
6
>>> print(Xs[0].shape) # number of samples in each view
(2000, 76)
>>> mvmds = MVMDS(n_components=5)
>>> Xs_reduced = mvmds.fit_transform(Xs)
>>> print(Xs_reduced.shape)
(2000, 5)
References
----------
.. [#1MVMDS] Trendafilov, Nickolay T. “Stepwise Estimation of Common
Principal Components.” Computational Statistics & Data
Analysis, 54(12):3446–3457, 2010
.. [#2MVMDS] Kanaan-Izquierdo, Samir, et al. "Multiview: a software
package for multiview pattern recognition methods." Bioinformatics,
35(16):2877–2879, 2019
"""
def __init__(self, n_components=2, num_iter=15, dissimilarity='euclidean'):
super().__init__()
self.components_ = None
self.n_components = n_components
self.num_iter = num_iter
self.dissimilarity = dissimilarity
if (self.num_iter) <= 0:
raise ValueError('The number of iterations must be greater than 0')
if (self.n_components) <= 0:
raise ValueError('The number of components must be greater than 0 '
+ 'and less than the number of features')
if self.dissimilarity not in ['euclidean', 'precomputed']:
raise ValueError('The parameter `dissimilarity` must be one of \
{`euclidean`, `precomputed`}')
def _commonpcs(self, Xs):
"""
Finds Stepwise Estimation of Common Principal Components as described
by common Trendafilov implementations based on the following paper:
https://www.sciencedirect.com/science/article/pii/S016794731000112X
Parameters
----------
Xs: List of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
Returns
-------
components: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
"""
n = p = Xs.shape[1]
views = len(Xs)
n_num = np.array([n] * views)/np.sum(np.array([n] * views))
components = np.zeros((p, self.n_components))
# Initialized by paper
pi = np.eye(p)
s = np.zeros((p, p))
for i in np.arange(views):
s = s + (n_num[i] * Xs[i])
_, e2 = np.linalg.eigh(s)
# Orders the eigenvalues
q0 = e2[:, ::-1]
for i in np.arange(self.n_components):
# Each q is a particular eigenvalue
q = q0[:, i]
q = np.array(q).reshape(len(q), 1)
d = np.zeros((1, views))
for j in np.arange(views):
# Represents mu from the paper.
d[:, j] = np.dot(np.dot(q.T, Xs[j]), q)
# stepwise iterations
for j in np.arange(self.num_iter):
s2 = np.zeros((p, p))
for yy in np.arange(views):
d2 = n_num[yy] * np.sum(np.array([n] * views))
# Dividing by .0001 is to prevent divide by 0 error
if d[:, yy] == 0:
s2 = s2 + (d2 * Xs[yy] / .0001)
else:
# Refers to d value from previous iteration
s2 = s2 + (d2 * Xs[yy] / d[:, yy])
# eigenvectors dotted with S matrix and pi
w = np.dot(s2, q)
w = np.dot(pi, w)
q = w / np.sqrt(np.dot(w.T, w))
for yy in np.arange(views):
d[:, yy] = np.dot(np.dot(q.T, Xs[yy]), q)
# creates next component
components[:, i] = q[:, 0]
# initializes pi for next iteration
pi = pi - np.dot(q, q.T)
return(components)
def fit(self, Xs, y=None):
"""
Calculates dimensionally reduced components by inputting the Euclidean
distances of each view, double centering them, and using the _commonpcs
function to find common components between views. Works similarly to
traditional, single-view Multidimensional Scaling.
Parameters
----------
Xs: list of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
y : ignored
Included for API compliance.
"""
if (self.n_components) > len(Xs[0]):
self.n_components = len(Xs[0])
warnings.warn('The number of components you have requested is '
+ 'greater than the number of samples in the '
+ 'dataset. ' + str(self.n_components)
+ ' components were computed instead.')
Xs = check_Xs(Xs, multiview=True)
mat = np.ones(shape=(len(Xs), len(Xs[0]), len(Xs[0])))
# Double centering each view as in single-view MDS
if (self.dissimilarity == 'euclidean'):
for i in np.arange(len(Xs)):
view = euclidean_distances(Xs[i])
view_squared = np.power(np.array(view), 2)
J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
B = -(1/2) * J @ view_squared @ J
mat[i] = B
# If user wants to input special distance matrix
elif (self.dissimilarity == 'precomputed'):
for i in np.arange(len(Xs)):
if (Xs[i].shape[0] != Xs[i].shape[1]):
raise ValueError('The input distance matrix must be '
+ 'a square matrix')
else:
view = Xs[i]
view_squared = np.power(np.array(view), 2)
J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
B = -(1/2) * J @ view_squared @ J
mat[i] = B
else:
raise ValueError('The parameter `dissimilarity` must be one of \
{`euclidean`, `precomputed`}')
self.components_ = self._commonpcs(mat)
return self
def fit_transform(self, Xs, y=None):
""""
Embeds data matrix(s) using fitted projection matrices
Parameters
----------
Xs: list of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
The data to embed based on the fit function.
y : ignored
Included for API compliance.
Returns
-------
X_transformed: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
"""
Xs = check_Xs(Xs)
self.fit(Xs)
return self.components_