Source code for mvlearn.compose.wrap

"""Singleview function wrapping utilities."""

# Authors: Pierre Ablin, Ronan Perry
#
# License: MIT

from scipy import stats
from sklearn.base import clone, BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.metrics import accuracy_score
from ..utils import check_Xs, check_Xs_y
import numpy as np


class BaseWrapper(BaseEstimator):
    """Wraps an sklearn-compliant estimator for use on multiple views"""
    def __init__(self, base_estimator):
        self.base_estimator = base_estimator

    def _prefit(self, Xs, y=None):
        r"""Estimate the attributes of the class.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to.

        y : array-like of length (n_samples,), optional (default None)
            Targets for a supervised estimation task

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if y is None:
            Xs = check_Xs(Xs)
        else:
            Xs, y = check_Xs_y(Xs, y)
        self.n_views_ = len(Xs)
        if type(self.base_estimator) is list:
            if len(self.base_estimator) != self.n_views_:
                raise ValueError(
                    "The length of the estimators should be the same as the"
                    "number of views"
                )
            self.estimators_ = self.base_estimator
        else:
            self.estimators_ = [
                clone(self.base_estimator) for _ in range(self.n_views_)
            ]
        return self

    def fit(self, Xs, y=None):
        r"""Fit each estimator to the data.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to.

        y : array-like of length (n_samples,), optional (default None)
            Targets for a supervised estimation task

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        self._prefit(Xs, y)
        for estimator, X in zip(self.estimators_, Xs):
            estimator.fit(X, y)
        return self


[docs]class ViewClassifier(BaseWrapper):
    r"""Apply a sklearn classifier to each view of a dataset

    Build a classifier from multiview data by using one
    or more individual scikit-learn classifiers on each view.

    Parameters
    ----------
    base_estimator : a sklearn classifier instance, or a list
        Either a single sklearn classifier that will be applied to each
        view. One clone of the estimator will correspond to each view.
        Otherwise, it should be a list of estimators, of length the number of
        views in the multiview dataset.

    Attributes
    ----------
    n_views_ : int
        The number of views in the input dataset

    estimators_ : list of objects of length n_views_
        The list of classifiers used to predict data labels. If
        self.base_estimator is a single estimator, this is a list containing
        clones of that estimator, otherwise it is one view of
        self.base_estimator.

    Examples
    --------
    >>> from mvlearn.datasets import load_UCImultifeature
    >>> from mvlearn.compose import ViewClassifier
    >>> from sklearn.linear_model import LogisticRegression
    >>> Xs, y = load_UCImultifeature()
    >>> clfs = ViewClassifier(LogisticRegression())
    >>> y_hat = clfs.fit(Xs, y).predict(Xs)
    >>> print(y_hat.shape)
    (2000,)
    """
    def predict(self, Xs):
        """
        Return the predicted class labels using majority vote of the
        predictions from each view.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to predict

        Returns
        -------
        y_hat : array-like of shape (n_samples,)
            Predicted class labels for each sample
        """
        check_is_fitted(self)
        Xs, n_views, _, _ = check_Xs(Xs, return_dimensions=True)
        if n_views != self.n_views_:
            raise ValueError(
                f"Multiview input data must have {self.n_views_} views")
        ys = [clf.predict(X) for clf, X in zip(self.estimators_, Xs)]
        return stats.mode(ys, axis=0)[0].squeeze()

    def score(self, Xs, y, sample_weight=None):
        """
        Return the mean accuracy on the given test data and labels.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to predict

        y : array-like of shape (n_samples,)
            True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(Xs) w.r.t. y
        """
        return accuracy_score(y, self.predict(Xs), sample_weight=sample_weight)


[docs]class ViewTransformer(BaseWrapper, TransformerMixin):
    r"""Apply a sklearn transformer to each view of a dataset

    Build a transformer from multiview dataset to multiview dataset by
    using one or more individual scikit-learn transformers on each view.

    Parameters
    ----------
    base_estimator : a sklearn transformer instance, or a list
        Either a single sklearn transformer that will be applied to each
        view. One clone of the estimator will correspond to each view.
        Otherwise, it should be a list of estimators, of length the number of
        views in the multiview dataset.

    Attributes
    ----------
    n_views_ : int
        The number of views in the input dataset

    estimators_ : list of objects of length n_views_
        The list of transformers used to transform data. If
        self.base_estimator is a single transformer, it is a list containing
        clones of that transformer, otherwise it is a view of
        self.base_estimator.

    Examples
    --------
    >>> from mvlearn.datasets import load_UCImultifeature
    >>> from mvlearn.compose import ViewTransformer
    >>> from sklearn.decomposition import PCA
    >>> Xs, _ = load_UCImultifeature()
    >>> repeat = ViewTransformer(PCA(n_components=2))
    >>> Xs_transformed = repeat.fit_transform(Xs)
    >>> print(len(Xs_transformed))
    6
    >>> print(Xs_transformed[0].shape)
    (2000, 2)
    """

    def transform(self, Xs, index=None):
        r"""Transform each dataset

        Applies the transform of each transformer on the
        individual views.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The input data.

        index: int or array-like, default=None
            The index or list of indices of the fitted views to which the
            inputted views correspond. If None, there should be as many
            inputted views as the fitted views and in the same order.
            Note that the index parameter is not available in all methods of
            mvlearn yet.

        Returns
        -------
        Xs_transformed : list of array-likes
            List of length n_views.
            The transformed data.
        """
        if index is None:
            index_ = np.arange(self.n_views_)
        else:
            index_ = np.copy(index)
            index_ = np.atleast_1d(index_)

        assert len(index_) == len(Xs)

        check_is_fitted(self)
        Xs = check_Xs(Xs)
        Xs_transformed = []
        for estimator, X in zip([self.estimators_[i] for i in index_], Xs):
            Xs_transformed.append(estimator.transform(X))
        return Xs_transformed

    def fit_transform(self, Xs, y=None):
        r"""Fit and transform each dataset

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to.

        y : numpy.ndarray of shape (n_samples,), optional (default None)
            Target values if a supervised transformation.

        Returns
        -------
        Xs_transformed : list of array-likes
            List of length n_views.
            The transformed data.
        """
        self._prefit(Xs, y)
        Xs_transformed = []
        for estimator, X in zip(self.estimators_, Xs):
            Xs_transformed.append(estimator.fit_transform(X, y))
        return Xs_transformed

    def inverse_transform(self, Xs, index=None):
        r"""Compute the inverse transform of a dataset

        Applies the inverse_transform function of each
        transformer on the individual datasets

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The input data.

        index: int or array-like, default=None
            The index or list of indices of the fitted views to which the
            inputted views correspond. If None, there should be as many
            inputted views as the fitted views and in the same order.
            Note that the index parameter is not available in all methods of
            mvlearn yet.

        Returns
        -------
        Xs_transformed : list of array-likes
            List of length n_views.
            The transformed data.
        """
        check_is_fitted(self)

        if index is None:
            index_ = np.arange(self.n_views_)
        else:
            index_ = np.copy(index)
            index_ = np.atleast_1d(index_)

        assert len(index_) == len(Xs)
        Xs = check_Xs(Xs)
        Xs_transformed = []
        for estimator, X in zip([self.estimators_[i] for i in index_], Xs):
            Xs_transformed.append(estimator.inverse_transform(X))
        return Xs_transformed