Source code for mvlearn.utils.utils

# License: MIT

import numpy as np
from sklearn.utils import check_X_y, check_array


[docs]def check_Xs( Xs, multiview=False, enforce_views=None, copy=False, return_dimensions=False, ): r""" Checks Xs and ensures it to be a list of 2D matrices. Parameters ---------- Xs : nd-array, list Input data. multiview : boolean, (default=False) If True, throws error if just 1 data matrix given. enforce_views : int, (default=not checked) If provided, ensures this number of views in Xs. Otherwise not checked. copy : boolean, (default=False) If True, the returned Xs is a copy of the input Xs, and operations on the output will not affect the input. If False, the returned Xs is a view of the input Xs, and operations on the output will change the input. return_dimensions : boolean, (default=False) If True, the function also returns the dimensions of the multiview dataset. The dimensions are n_views, n_samples, n_features where n_samples and n_views are respectively the number of views and the number of samples, and n_features is a list of length n_views containing the number of features of each view. Returns ------- Xs_converted : object The converted and validated Xs (list of data arrays). n_views : int The number of views in the dataset. Returned only if ``return_dimensions`` is ``True``. n_samples : int The number of samples in the dataset. Returned only if ``return_dimensions`` is ``True``. n_features : list List of length ``n_views`` containing the number of features in each view. Returned only if ``return_dimensions`` is ``True``. """ if not isinstance(Xs, list): if not isinstance(Xs, np.ndarray): msg = f"If not list, input must be of type np.ndarray,\ not {type(Xs)}" raise ValueError(msg) if Xs.ndim == 2: Xs = [Xs] else: Xs = list(Xs) n_views = len(Xs) if n_views == 0: msg = "Length of input list must be greater than 0" raise ValueError(msg) if multiview: if n_views == 1: msg = "Must provide at least two data matrices" raise ValueError(msg) if enforce_views is not None and n_views != enforce_views: msg = "Wrong number of views. Expected {} but found {}".format( enforce_views, n_views ) raise ValueError(msg) Xs = [check_array(X, allow_nd=False, copy=copy) for X in Xs] if not len(set([X.shape[0] for X in Xs])) == 1: msg = "All views must have the same number of samples" raise ValueError(msg) if return_dimensions: n_samples = Xs[0].shape[0] n_features = [X.shape[1] for X in Xs] return Xs, n_views, n_samples, n_features else: return Xs
[docs]def check_Xs_y( Xs, y, multiview=False, enforce_views=None, return_dimensions=False ): r""" Checks Xs and y for consistent length. Xs is set to be of dimension 3. Parameters ---------- Xs : nd-array, list Input data. y : nd-array, list Labels. multiview : boolean, (default=False) If True, throws error if just 1 data matrix given. enforce_views : int, (default=not checked) If provided, ensures this number of views in Xs. Otherwise not checked. return_dimensions : boolean, (default=False) If True, the function also returns the dimensions of the multiview dataset. The dimensions are n_views, n_samples, n_features where n_samples and n_views are respectively the number of views and the number of samples, and n_features is a list of length n_views containing the number of features of each view. Returns ------- Xs_converted : object The converted and validated Xs (list of data arrays). y_converted : object The converted and validated y. n_views : int The number of views in the dataset. Returned only if ``return_dimensions`` is ``True``. n_samples : int The number of samples in the dataset. Returned only if ``return_dimensions`` is ``True``. n_features : list List of length ``n_views`` containing the number of features in each view. Returned only if ``return_dimensions`` is ``True``. """ if return_dimensions: Xs_converted, n_views, n_samples, n_features = check_Xs( Xs, multiview=multiview, enforce_views=enforce_views, return_dimensions=True, ) else: Xs_converted = check_Xs( Xs, multiview=multiview, enforce_views=enforce_views ) _, y_converted = check_X_y(Xs_converted[0], y, allow_nd=False) if return_dimensions: return Xs_converted, y_converted, n_views, n_samples, n_features else: return Xs_converted, y_converted
[docs]def check_Xs_y_nan_allowed( Xs, y, multiview=False, enforce_views=None, num_classes=None, max_classes=None, min_classes=None ): r""" Checks Xs and y for consistent length. Xs is set to be of dimension 3. The labels (y) are allowed to be np.nan. Parameters ---------- Xs : nd-array, list Input data. y : nd-array, list Labels. multiview : boolean, default=False If True, throws error if just 1 data matrix given. enforce_views : int, (default=not checked) If provided, ensures this number of views in Xs. Otherwise not checked. num_classes : int, default=None Number of classes that must appear in the labels. If none, then not checked. max_classes : int, default=None Maximum number of classes that must appear in labels. If none, then not checked. min_classes : int, default=None Minimum number of classes that must appear in labels. If none, then not checked. Returns ------- Xs_converted : object The converted and validated Xs (list of data arrays). y_converted : object The converted and validated y. """ Xs_converted = check_Xs( Xs, multiview=multiview, enforce_views=enforce_views ) y_converted = np.array(y) if len(y_converted) != Xs_converted[0].shape[0]: msg = ( "Incompatible label length {} for " " data with {} samples".format( len(y_converted), Xs_converted[0].shape[0] ) ) raise ValueError(msg) if num_classes is not None: # if not exactly correct number of class labels, raise error classes = list(set(y[~np.isnan(y)])) n_classes = len(classes) if n_classes != num_classes: raise ValueError( "Wrong number of class labels. Expected {},\ found {}".format( num_classes, n_classes ) ) if max_classes is not None: # if not exactly correct number of class labels, raise error classes = list(set(y[~np.isnan(y)])) n_classes = len(classes) if n_classes > max_classes: raise ValueError( "Wrong number of class labels. Expected no\ more than {}, found {}".format( num_classes, n_classes ) ) return Xs_converted, y_converted
def param_as_list(param, n_views, single_None=False): """ Returns a parameter as a list Parameters ---------- Returns ------- """ if isinstance(param, list): assert len(param) == n_views, \ f"params {param} must be of length n_views={n_views}" elif not (param is None and single_None): param = [param] * n_views return param