Source code for mvlearn.decomposition.groupica

"""Group Independent Component Analysis."""

# Authors: Pierre Ablin, Hugo Richard
#
# License: MIT

import numpy as np
from scipy import linalg
from sklearn.decomposition import fastica
from sklearn.utils.validation import check_is_fitted

try:
    from picard import picard
except ModuleNotFoundError as error:
    msg = (f"ModuleNotFoundError: {error}. multiviewica dependencies" +
           "required for this function. Please consult the mvlearn" +
           "installation instructions at https://github.com/mvlearn/mvlearn" +
           "to correctly install multiviewica dependencies.")
    raise ModuleNotFoundError(msg)

from ..utils.utils import check_Xs
from .base import BaseDecomposer
from .grouppca import GroupPCA


[docs]class GroupICA(BaseDecomposer): r"""Group Independent component analysis. Each dataset in `Xs` is reduced with usual PCA (this step is optional). Then, datasets are concatenated in the features direction, and a PCA is performed on this matrix, yielding a single dataset. ICA is finally performed yielding the output dataset S. The unmixing matrix W corresponding to data X are obtained by solving argmin_{W} ||S - WX||^2. Parameters ---------- n_components : int, optional Number of components to extract. If None, n_components is set to the minimum number of features in the dataset n_individual_components : int or list of int or 'auto', optional The number of individual components to extract as a preprocessing. If None, no preprocessing is applied. If an `int`, each dataset is reduced to this dimension. If a list, the dataset `i` is reduced to the dimension `n_individual_components[i]`. If `'auto'`, set to the minimum between n_components and the smallest number of features in each dataset. multiview_output : bool, optional (default True) If True, the `.transform` method returns one dataset per view. Otherwise, it returns one dataset, of shape (n_samples, n_components) prewhiten : bool, optional (default False) Whether the data should be whitened after the original preprocessing. solver : str {'picard', 'fastica'} Chooses which ICA solver to use. `picard` is generally faster and more reliable. ica_kwargs : dict Optional keyword arguments for the ICA solver. If solver='fastica', see the documentation of sklearn.decomposition.fastica. If solver='picard', see the documentation of picard.picard. random_state : int, RandomState instance, default=None Controls the random number generator used in the estimator. Pass an int for reproducible output across multiple function calls. Attributes ---------- means_ : list of arrays of shape (n_components,) The mean of each dataset grouppca_ : mvlearn.decomposition.GroupPCA instance A GroupPCA class for preprocessing and dimension reduction mixing_ : array, shape (n_components, n_components) The square mixing matrix, linking the output of the group-pca and the independent components. components_ : array, shape (n_components, n_components) The square unmixing matrix individual_components_ : list of array Individual unmixing matrices estimated by least squares. `individual_components_[i]` is an array of shape (n_components, n_features) where n_features is the number of features in the dataset `i`. individual_mixing_ : list of array Individual mixing matrices estimated by least squares. `individual_components_[i]` is an array of shape (n_features, n_components) where n_features is the number of features in the dataset `i`. n_components_ : int The estimated number of components. n_features_ : list of int Number of features in each training dataset. n_samples_ : int Number of samples in the training data. n_views_ : int Number of views in the training data See also -------- GroupPCA multiviewica References ---------- .. [#1groupica] Vince D Calhoun, et al. "A method for making group inferences from functional MRI data using independent component analysis." Human Brain Mapping, 14(3):140–151, 2001. Examples -------- >>> from mvlearn.datasets import load_UCImultifeature >>> from mvlearn.decomposition import GroupICA >>> Xs, _ = load_UCImultifeature() >>> ica = GroupICA(n_components=3) >>> Xs_transformed = ica.fit_transform(Xs) >>> print(len(Xs_transformed)) 6 >>> print(Xs_transformed[0].shape) (2000, 3) """ def __init__( self, n_components=None, n_individual_components="auto", multiview_output=True, prewhiten=False, solver="picard", ica_kwargs={}, random_state=None, ): if solver not in ["picard", "fastica"]: raise ValueError( "Invalid solver, must be either `fastica` or `picard`" ) self.n_components = n_components self.n_individual_components = n_individual_components self.multiview_output = multiview_output self.prewhiten = prewhiten self.solver = solver self.ica_kwargs = ica_kwargs self.random_state = random_state def fit(self, Xs, y=None): r"""Fit to the data. Estimate the parameters of the model Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) y : None Ignored variable. Returns ------- self : object Returns the instance itself. """ Xs = check_Xs(Xs, copy=True) self.means_ = [np.mean(X, axis=0) for X in Xs] gpca = GroupPCA( n_components=self.n_components, n_individual_components=self.n_individual_components, prewhiten=self.prewhiten, whiten=True, multiview_output=False, random_state=self.random_state, ) X_pca = gpca.fit_transform(Xs) self.grouppca_ = gpca if self.solver == "fastica": K, W, sources = fastica( X_pca, **self.ica_kwargs, random_state=self.random_state ) else: K, W, sources = picard( X_pca.T, **self.ica_kwargs, random_state=self.random_state ) sources = sources.T if K is not None: self.components_ = np.dot(W, K) else: self.components_ = W self.mixing_ = linalg.pinv(self.components_) # Compute individual unmixing matrices by least-squares self.individual_mixing_ = [] self.individual_components_ = [] sources_pinv = linalg.pinv(sources) for X, mean in zip(Xs, self.means_): lstq_solution = np.dot(sources_pinv, X - mean) self.individual_components_.append(linalg.pinv(lstq_solution).T) self.individual_mixing_.append(lstq_solution.T) self.n_components_ = gpca.n_components_ self.n_features_ = gpca.n_features_ self.n_samples_ = gpca.n_samples_ self.n_views_ = gpca.n_views_ return self def transform(self, Xs, y=None, index=None): r"""Transform the data Xs into sources. Parameters ---------- Xs: list of array-likes - Xs shape: (n_views,) - Xs[i] shape: (n_samples, n_features_i) y : None Ignored variable. index: int or array-like, default=None The index or list of indices of the fitted views to which the inputted views correspond. If None, there should be as many inputted views as the fitted views and in the same order. Note that the index parameter is not available in all methods of mvlearn yet. Returns ------- X_transformed : list of array-likes or numpy.ndarray The transformed data. If `multiview_output` is True, it is a list with the estimated individual sources. If `multiview_output` is False, it is a single array containing the shared sources. """ check_is_fitted(self) Xs = check_Xs(Xs, copy=True) if index is None: index_ = np.arange(self.n_views_) else: index_ = np.copy(index) index_ = np.atleast_1d(index_) assert len(index_) == len(Xs) if self.multiview_output: return [ np.dot(X - mean, W.T) for W, X, mean in ( zip( [self.individual_components_[i] for i in index_], Xs, [self.means_[i] for i in index_], ) ) ] else: X = self.grouppca_.transform(Xs, index=index) return np.dot(X, self.components_.T) def inverse_transform(self, X_transformed, index=None): r"""Recover multiview data from transformed data. Parameters ---------- X_transformed : list of array-likes or numpy.ndarray If `multiview_output` is True, it must be a list of arrays of shape (n_samples, n_components) containing estimated sources. If `multiview_output` is False, it must be a single array containing shared sources. index: int or array-like, default=None The index or list of indices of the fitted views to which the inputted views correspond. If None, there should be as many inputted views as the fitted views and in the same order. Note that the index parameter is not available in all methods of mvlearn yet. Returns ------- Xs : list of arrays The recovered individual datasets. """ check_is_fitted(self) if index is None: index_ = np.arange(self.n_views_) else: index_ = np.copy(index) index_ = np.atleast_1d(index_) if self.multiview_output: X_transformed = check_Xs(X_transformed) assert len(X_transformed) == len(index_) return [ np.dot(X, A.T) + mean for X, A, mean in ( zip( X_transformed, [self.individual_mixing_[i] for i in index_], [self.means_[i] for i in index_], ) ) ] else: return self.grouppca_.inverse_transform( np.dot(X_transformed, self.mixing_.T), index=index )