Source code for mvlearn.compose.split

"""Splitting utilities."""

# Authors: Pierre Ablin
#
# License: MIT

import numpy as np

from sklearn.base import TransformerMixin
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from ..utils.utils import check_Xs


[docs]class SimpleSplitter(TransformerMixin): r"""A transformer that splits the features of a single dataset. Take a singleview dataset and transform it in a multiview dataset by splitting features to different views Parameters ---------- n_features : list of ints The number of feature to keep in each split: Xs[i] will have shape (n_samples, n_features[i]) Attributes ---------- n_total_features_ : int The number of features in the dataset, equal to the sum of n_features_ n_views_ : int The number of views in the output dataset See Also -------- ConcatMerger """ def __init__(self, n_features): self.n_features = n_features def fit(self, X, y=None): r"""Fit to the data. Checks that X has a compatible shape. Parameters ---------- X : array of shape (n_samples, n_total_features) Input dataset y Ignored Returns ------- self : object Transformer instance. """ X = check_array(X) _, n_total_features = X.shape self.n_total_features_ = sum(self.n_features) if self.n_total_features_ != n_total_features: raise ValueError("The number of features of X should equal the sum" " of n_features") self.n_views_ = len(self.n_features) return self def transform(self, X, y=None): r"""Split data The singleview dataset and transform it in a multiview dataset by splitting features to different views Parameters ---------- X : array of shape (n_samples, n_total_features) Input dataset y Ignored Returns ------- Xs_transformed : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) """ check_is_fitted(self) X = check_array(X) return np.split(X, np.cumsum(self.n_features)[:-1], axis=1) def fit_transform(self, X, y=None): r"""Fit to the data and split Parameters ---------- X: array of shape (n_samples, n_features) Input data y : array, shape (n_samples,), optional Returns ------- Xs_transformed : list of array-likes - Xs shape: (n_views,) - Xs[i] shape: (n_samples, n_features_i) """ return self.fit(X, y).transform(X) def inverse_transform(self, Xs): r"""Take a multiview dataset and merge it in a single view The input dimension must match the fitted dimension of the multiview dataset. Parameters ---------- Xs : list of numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The input multiview dataset Returns ------- X : numpy.ndarray, shape (n_total_features, n_samples) The output singleview dataset """ check_is_fitted(self) Xs = check_Xs(Xs) for X, n_feature in zip(Xs, self.n_features): if X.shape[1] != n_feature: raise ValueError("The number of features in Xs does not match" " n_features") return np.hstack(Xs)