Source code for mvlearn.compose.rsm

"""Random Subspace Method"""

# Authors: Ronan Perry
#
# License: MIT

import numpy as np
from sklearn.utils import check_array
from sklearn.base import TransformerMixin
from sklearn.utils.validation import check_is_fitted
from .utils import check_n_views, check_n_features


[docs]class RandomSubspaceMethod(TransformerMixin): """ Random Subspace Method [#1RSM]_ for constructing multiple views. Each view is constructed by randomly selecting features from X. Parameters ---------- n_views : int Number of views to construct subspace_dim : int, float Number of features from the data to subsample. If float, is between 0 and 1 and denotes the proportion of features to subsample. random_state : int or RandomState instance, optional (default None) Controls the random sampling of features. Set for reproducible results. Attributes ---------- n_features_ : list, length n_views The number of features in the fitted data subspace_dim_ : int The number of features subsampled in each view subspace_indices_ : list of numpy.ndarray, length n_views Feature indices to subsample for each view References ---------- .. [#1RSM] Tin Kam Ho. "The random subspace method for constructing decision forests." IEEE Transactions on Pattern Analysis and Machine Intelligence, 20(8):832–844, 1998. .. [#2RSM] Dacheng Tao, Xiaoou Tang, Xuelong Li and Xindong Wu, "Asymmetric bagging and random subspace for support vector machines-based relevance feedback in image retrieval," in IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(7):1088-1099, July 2006 Examples -------- >>> from mvlearn.compose import RandomSubspaceMethod >>> import numpy as np >>> X = np.random.rand(1000, 50) >>> rsm = RandomSubspaceMethod(n_views=3, subspace_dim=10) >>> Xs = rsm.fit_transform(Xs) >>> print(len(Xs)) 3 >>> print(Xs[0].shape) (1000, 10) """ def __init__(self, n_views, subspace_dim, random_state=None): check_n_views(n_views) self.n_views = n_views self.subspace_dim = subspace_dim self.random_state = random_state def fit(self, X, y=None): r""" Fit to the singleview data. Parameters ---------- X : array of shape (n_samples, n_total_features) Input dataset y : Ignored Returns ------- self : object The Transformer instance """ X = check_array(X) _, n_features = X.shape self.n_features_ = n_features check_n_features(self.subspace_dim, n_features) # check if n_feaures is between 0 and 1 self.subspace_dim_ = self.subspace_dim if self.subspace_dim_ < 1: self.subspace_dim_ = int(self.subspace_dim_ * n_features) np.random.seed(self.random_state) self.subspace_indices_ = [ np.random.choice( n_features, size=self.subspace_dim_, replace=False) for _ in range(self.n_views)] return self def transform(self, X): r""" Transforms the singleview dataset and into a multiview dataset. Parameters ---------- X : array of shape (n_samples, n_features) Input dataset Returns ------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, subspace_dim) """ check_is_fitted(self) X = check_array(X) _, n_features = X.shape if n_features != self.n_features_: raise ValueError("Number of features different " + f"than fitted number {self.n_features_}") Xs = [X[:, idxs] for idxs in self.subspace_indices_] return Xs