Source code for mvlearn.compose.wrap

"""Singleview function wrapping utilities."""

# Authors: Pierre Ablin, Ronan Perry
#
# License: MIT

from scipy import stats
from sklearn.base import clone, BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.metrics import accuracy_score
from ..utils import check_Xs, check_Xs_y
import numpy as np


class BaseWrapper(BaseEstimator):
    """Wraps an sklearn-compliant estimator for use on multiple views"""
    def __init__(self, base_estimator):
        self.base_estimator = base_estimator

    def _prefit(self, Xs, y=None):
        r"""Estimate the attributes of the class.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to.

        y : array-like of length (n_samples,), optional (default None)
            Targets for a supervised estimation task

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if y is None:
            Xs = check_Xs(Xs)
        else:
            Xs, y = check_Xs_y(Xs, y)
        self.n_views_ = len(Xs)
        if type(self.base_estimator) is list:
            if len(self.base_estimator) != self.n_views_:
                raise ValueError(
                    "The length of the estimators should be the same as the"
                    "number of views"
                )
            self.estimators_ = self.base_estimator
        else:
            self.estimators_ = [
                clone(self.base_estimator) for _ in range(self.n_views_)
            ]
        return self

    def fit(self, Xs, y=None):
        r"""Fit each estimator to the data.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to.

        y : array-like of length (n_samples,), optional (default None)
            Targets for a supervised estimation task

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        self._prefit(Xs, y)
        for estimator, X in zip(self.estimators_, Xs):
            estimator.fit(X, y)
        return self


[docs]class ViewClassifier(BaseWrapper): r"""Apply a sklearn classifier to each view of a dataset Build a classifier from multiview data by using one or more individual scikit-learn classifiers on each view. Parameters ---------- base_estimator : a sklearn classifier instance, or a list Either a single sklearn classifier that will be applied to each view. One clone of the estimator will correspond to each view. Otherwise, it should be a list of estimators, of length the number of views in the multiview dataset. Attributes ---------- n_views_ : int The number of views in the input dataset estimators_ : list of objects of length n_views_ The list of classifiers used to predict data labels. If self.base_estimator is a single estimator, this is a list containing clones of that estimator, otherwise it is one view of self.base_estimator. Examples -------- >>> from mvlearn.datasets import load_UCImultifeature >>> from mvlearn.compose import ViewClassifier >>> from sklearn.linear_model import LogisticRegression >>> Xs, y = load_UCImultifeature() >>> clfs = ViewClassifier(LogisticRegression()) >>> y_hat = clfs.fit(Xs, y).predict(Xs) >>> print(y_hat.shape) (2000,) """ def predict(self, Xs): """ Return the predicted class labels using majority vote of the predictions from each view. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The data to predict Returns ------- y_hat : array-like of shape (n_samples,) Predicted class labels for each sample """ check_is_fitted(self) Xs, n_views, _, _ = check_Xs(Xs, return_dimensions=True) if n_views != self.n_views_: raise ValueError( f"Multiview input data must have {self.n_views_} views") ys = [clf.predict(X) for clf, X in zip(self.estimators_, Xs)] return stats.mode(ys, axis=0)[0].squeeze() def score(self, Xs, y, sample_weight=None): """ Return the mean accuracy on the given test data and labels. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The data to predict y : array-like of shape (n_samples,) True labels for X. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- score : float Mean accuracy of self.predict(Xs) w.r.t. y """ return accuracy_score(y, self.predict(Xs), sample_weight=sample_weight)
[docs]class ViewTransformer(BaseWrapper, TransformerMixin): r"""Apply a sklearn transformer to each view of a dataset Build a transformer from multiview dataset to multiview dataset by using one or more individual scikit-learn transformers on each view. Parameters ---------- base_estimator : a sklearn transformer instance, or a list Either a single sklearn transformer that will be applied to each view. One clone of the estimator will correspond to each view. Otherwise, it should be a list of estimators, of length the number of views in the multiview dataset. Attributes ---------- n_views_ : int The number of views in the input dataset estimators_ : list of objects of length n_views_ The list of transformers used to transform data. If self.base_estimator is a single transformer, it is a list containing clones of that transformer, otherwise it is a view of self.base_estimator. Examples -------- >>> from mvlearn.datasets import load_UCImultifeature >>> from mvlearn.compose import ViewTransformer >>> from sklearn.decomposition import PCA >>> Xs, _ = load_UCImultifeature() >>> repeat = ViewTransformer(PCA(n_components=2)) >>> Xs_transformed = repeat.fit_transform(Xs) >>> print(len(Xs_transformed)) 6 >>> print(Xs_transformed[0].shape) (2000, 2) """ def transform(self, Xs, index=None): r"""Transform each dataset Applies the transform of each transformer on the individual views. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The input data. index: int or array-like, default=None The index or list of indices of the fitted views to which the inputted views correspond. If None, there should be as many inputted views as the fitted views and in the same order. Note that the index parameter is not available in all methods of mvlearn yet. Returns ------- Xs_transformed : list of array-likes List of length n_views. The transformed data. """ if index is None: index_ = np.arange(self.n_views_) else: index_ = np.copy(index) index_ = np.atleast_1d(index_) assert len(index_) == len(Xs) check_is_fitted(self) Xs = check_Xs(Xs) Xs_transformed = [] for estimator, X in zip([self.estimators_[i] for i in index_], Xs): Xs_transformed.append(estimator.transform(X)) return Xs_transformed def fit_transform(self, Xs, y=None): r"""Fit and transform each dataset Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The data to fit to. y : numpy.ndarray of shape (n_samples,), optional (default None) Target values if a supervised transformation. Returns ------- Xs_transformed : list of array-likes List of length n_views. The transformed data. """ self._prefit(Xs, y) Xs_transformed = [] for estimator, X in zip(self.estimators_, Xs): Xs_transformed.append(estimator.fit_transform(X, y)) return Xs_transformed def inverse_transform(self, Xs, index=None): r"""Compute the inverse transform of a dataset Applies the inverse_transform function of each transformer on the individual datasets Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The input data. index: int or array-like, default=None The index or list of indices of the fitted views to which the inputted views correspond. If None, there should be as many inputted views as the fitted views and in the same order. Note that the index parameter is not available in all methods of mvlearn yet. Returns ------- Xs_transformed : list of array-likes List of length n_views. The transformed data. """ check_is_fitted(self) if index is None: index_ = np.arange(self.n_views_) else: index_ = np.copy(index) index_ = np.atleast_1d(index_) assert len(index_) == len(Xs) Xs = check_Xs(Xs) Xs_transformed = [] for estimator, X in zip([self.estimators_[i] for i in index_], Xs): Xs_transformed.append(estimator.inverse_transform(X)) return Xs_transformed