Source code for mvlearn.compose.merge

"""Merging utilities."""

# Authors: Pierre Ablin
#
# License: MIT

import numpy as np

from abc import abstractmethod

from sklearn.base import TransformerMixin
from sklearn.utils.validation import check_is_fitted

from ..utils.utils import check_Xs


class BaseMerger(TransformerMixin):
    """A base class for merging multiview datasets into single view datasets.

    The .transform function should return a single dataset.

    Parameters
    ----------
    Attributes
    ----------
    See Also
    --------
    """

    def __init__(self):
        pass  # pragma: no cover

    @abstractmethod
    def fit(self, Xs, y=None):
        r"""Fit model to multiview data.

        Parameters
        ----------
        Xs: list of array-likes
            - Xs shape: (n_views,)
            - Xs[i] shape: (n_samples, n_features_i)
        y : array, shape (n_samples,), optional

        Returns
        -------
        self: returns an instance of self.
        """

        return self  # pragma: no cover

    @abstractmethod
    def transform(self, Xs, y=None):
        r"""Merge multiview data into a single dataset

        Parameters
        ----------
        Xs: list of array-likes
            - Xs shape: (n_views,)
            - Xs[i] shape: (n_samples, n_features_i)
        y : array, shape (n_samples,), optional

        Returns
        -------
        X_transformed : numpy.ndarray of shape (n_samples, n_features)
             The singleview output
        """
        pass  # pragma: no cover

    def fit_transform(self, Xs, y=None):
        r"""Fit  to the data and merge

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
        y : array, shape (n_samples,), optional

        Returns
        -------
        X_transformed : numpy.ndarray of shape (n_samples, n_features)
             The singleview output
        """
        return self.fit(Xs, y).transform(Xs)

    @abstractmethod
    def inverse_transform(self, X):
        r"""Take a single view dataset and split it into multiple views.

        Parameters
        ----------
        X : numpy.ndarray, shape (n_total_features, n_samples)
            The input dataset

        Returns
        -------
        Xs : list of numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
        """

        pass  # pragma: no cover


[docs]class ConcatMerger(BaseMerger): r"""A transformer that stacks features of multiview datasets. Take a multiview dataset and transform it in a single view dataset by stacking features. Attributes ---------- n_features_ : list of ints The number of features in each view of the input dataset n_total_features_ : int The number of features in the dataset, equal to the sum of n_features_ n_views_ : int The number of views in the dataset See Also -------- AverageMerger """ def __init__(self): pass def fit(self, Xs, y=None): r"""Fit to the data. Stores the number of features in each view Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) y Ignored Returns ------- self : object Transformer instance. """ Xs, n_views, n_samples, n_features = check_Xs( Xs, return_dimensions=True ) self.n_features_ = n_features self.n_total_features_ = sum(self.n_features_) self.n_views_ = n_views return self def transform(self, Xs, y=None): r"""Merge the data by stacking its features. The multiple views are transformed into a single view dataset by stacking (i.e. concatenating) the features. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) y Ignored Returns ------- X_transformed : numpy.ndarray of shape (n_total_features, n_samples) The stacked data, containing all the stacked features. """ Xs = check_Xs(Xs) return np.hstack(Xs) def inverse_transform(self, X): r"""Take a single view dataset and split it into multiple views. The input dimension must match the fitted dimension of the multiview dataset. Parameters ---------- X : numpy.ndarray, shape (n_total_features, n_samples) The input dataset Returns ------- Xs : list of numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The multiview dataset obtained by splitting features of X """ check_is_fitted(self) n_feature = X.shape[1] if n_feature != self.n_total_features_: raise ValueError( "The number of features in the input array ({}) does not match" " the total number of features in the multiview dataset" " ({})".format(n_feature, self.n_total_features_) ) return np.split(X, np.cumsum(self.n_features_)[:-1], axis=1)
[docs]class AverageMerger(BaseMerger): r"""A transformer that computes the mean of multiview datasets Take a multiview dataset and transform it in a single view dataset by averaging across views Attributes ---------- n_feature_ : list of ints The number of feature in each view of the input dataset Must be the same for each dataset. n_views_ : int The number of views in the dataset See Also -------- ConcatMerger """ def __init__(self): pass def fit(self, Xs, y=None): r"""Fit to the data. Stores the number of features in each view, and checks that each view has the same number of features. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) y Ignored Returns ------- self : object Transformer instance. """ Xs = check_Xs(Xs) n_features_ = [X.shape[1] for X in Xs] if len(set(n_features_)) > 1: raise ValueError( "The number of features in each dataset should be the same." ) self.n_feature_ = n_features_[0] self.n_views_ = len(n_features_) return self def transform(self, Xs, y=None): r"""Merge the views by averaging Transform the multiview dataset into a single view by averaging the views Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) y Ignored Returns ------- X_transformed : numpy.ndarray of shape (n_total_features, n_samples) The average of the views. """ Xs = check_Xs(Xs) return np.mean(Xs, axis=0)