Source code for mne_rsa.rdm

# encoding: utf-8
"""Methods to compute dissimilarity matrices (RDMs)."""

import numpy as np
from joblib import Parallel, delayed
from scipy.spatial import distance

from .folds import create_folds
from .searchlight import searchlight


[docs] def compute_rdm(data, metric="correlation", **kwargs): """Compute a dissimilarity matrix (RDM). Parameters ---------- data : ndarray, shape (n_items, ...) For each item, all the features. The first dimension are the items and all other dimensions will be flattened and treated as features. metric : str | function The distance metric to use to compute the RDM. Can be any metric supported by :func:`scipy.spatial.distance.pdist`. When a function is specified, it needs to take in two vectors and output a single number. See also the ``dist_params`` parameter to specify and additional parameter for the distance function. Defaults to 'correlation'. **kwargs : dict, optional Extra arguments for the distance metric. Refer to :mod:`scipy.spatial.distance` for a list of all other metrics and their arguments. Returns ------- rdm : ndarray, shape (n_classes * n_classes-1,) The RDM, in condensed form. See :func:`scipy.spatial.distance.squareform`. See Also -------- compute_rdm_cv """ X = np.reshape(np.asarray(data), (len(data), -1)) n_items, n_features = X.shape # Be careful with certain metrics if n_features == 1 and metric in ["correlation", "cosine"]: raise ValueError( "There is only a single feature, so " "'correlation' and 'cosine' can not be " "used as RDM metric. Consider using 'sqeuclidean' " "instead." ) return distance.pdist(X, metric, **kwargs)
[docs] def compute_rdm_cv(folds, metric="correlation", **kwargs): """Compute a dissimilarity matrix (RDM) using cross-validation. The distance computation is performed from the average of all-but-one "training" folds to the remaining "test" fold. This is repeated with each fold acting as the "test" fold once. The resulting distances are averaged and the result used in the final RDM. Parameters ---------- folds : ndarray, shape (n_folds, n_items, ...) For each item, all the features. The first dimension are the folds used for cross-validation, items are along the second dimension, and all other dimensions will be flattened and treated as features. metric : str | function The distance metric to use to compute the RDM. Can be any metric supported by :func:`scipy.spatial.distance.pdist`. When a function is specified, it needs to take in two vectors and output a single number. See also the ``dist_params`` parameter to specify and additional parameter for the distance function. Defaults to 'correlation'. **kwargs : dict, optional Extra arguments for the distance metric. Refer to :mod:`scipy.spatial.distance` for a list of all other metrics and their arguments. Returns ------- rdm : ndarray, shape (n_classes * n_classes-1,) The cross-validated RDM, in condensed form. See :func:`scipy.spatial.distance.squareform`. See Also -------- compute_rdm """ X = np.reshape(folds, (folds.shape[0], folds.shape[1], -1)) n_folds, n_items, n_features = X.shape[:3] # Be careful with certain metrics if n_features == 1 and metric in ["correlation", "cosine"]: raise ValueError( "There is only a single feature, so " "'correlataion' and 'cosine' can not be " "used as RDM metric. Consider using 'sqeuclidean' " "instead." ) rdm = np.zeros((n_items * (n_items - 1)) // 2) X_mean = X.mean(axis=0) # Do cross-validation for test_fold in range(n_folds): X_test = X[test_fold] X_train = X_mean - (X_mean - X_test) / (n_folds - 1) dist = distance.cdist(X_train, X_test, metric, **kwargs) rdm += dist[np.triu_indices_from(dist, 1)] return rdm / n_folds
def _ensure_condensed(rdm, var_name): """Convert a RDM to condensed form if needed.""" if isinstance(rdm, list): return [_ensure_condensed(d, var_name) for d in rdm] if not isinstance(rdm, np.ndarray): raise TypeError( "A single RDM should be a NumPy array. " "Multiple RDMs should be a list of NumPy arrays." ) if rdm.ndim == 2: if rdm.shape[0] != rdm.shape[1]: raise ValueError( f'Invalid dimensions for "{var_name}" ' "({rdm.shape}). The RDM should either be a " "square matrix, or a one dimensional array when " "in condensed form." ) rdm = distance.squareform(rdm) elif rdm.ndim != 1: raise ValueError( f'Invalid dimensions for "{var_name}" ({rdm.shape}). ' "The RDM should either be a square matrix, or a one " "dimensional array when in condensed form." ) return rdm def _n_items_from_rdm(rdm): """Get the number of items, given a RDM.""" if rdm.ndim == 2: return rdm.shape[0] elif rdm.ndim == 1: return distance.squareform(rdm).shape[0] else: raise ValueError(f"Wrong number of dimensions for RDM ({rdm.ndim})") def pick_rdm(rdm, sel): """Select items from an RDM. Parameters ---------- rdm : ndarray, shape (n, n) | (n * (n - 1) // 2,) The RDM to pick items from. sel : int | list of int | boolean mask | slice The items to pick. These items will be selected from both rows and columns of the RDM. Returns ------- rdm : ndarray, shape (n, n) | (n * (n - 1) // 2,) A new RDM with only the selected items. If the original RDM was in condensed form, the returned RDM will be as well. """ if np.isscalar(sel): sel = [sel] # to avoid dropping a dimension if rdm.ndim == 2: return rdm[sel, :][:, sel] elif rdm.ndim == 1: return distance.squareform(distance.squareform(rdm)[sel, :][:, sel]) else: raise ValueError(f"Wrong number of dimensions for RDM ({rdm.ndim})")
[docs] class rdm_array: """Generate RDMs from an array of data, possibly in a searchlight pattern. First use :class:`searchlight` to compute the searchlight patches. Then you can use this function to compute RDMs for each searchlight patch. Parameters ---------- X : ndarray, shape (n_items, n_series, n_times) An array containing the data. patches : generator of tuples | None Searchlight patches as generated by :class:`searchlight`. If ``None``, no searchlight is used. Defaults to ``None``. dist_metric : str | function The distance metric to use to compute the RDMs. Can be any metric supported by :func:`scipy.spatial.distance.pdist`. When a function is specified, it needs to take in two vectors and output a single number. See also the ``dist_params`` parameter to specify and additional parameter for the distance function. Defaults to 'correlation'. dist_params : dict Extra arguments for the distance metric used to compute the RDMs. Refer to :mod:`scipy.spatial.distance` for a list of all other metrics and their arguments. Defaults to an empty dictionary. y : ndarray of int, shape (n_items,) | None For each item, a number indicating the class to which the item belongs. When ``None``, each item is assumed to belong to a different class. Defaults to ``None``. n_folds : int | sklearn.model_selection.BaseCrollValidator | None Number of cross-validation folds to use when computing the distance metric. Folds are created based on the ``y`` parameter. Specify ``None`` to use the maximum number of folds possible, given the data. Alternatively, you can pass a Scikit-Learn cross validator object (e.g. ``sklearn.model_selection.KFold``) to assert fine-grained control over how folds are created. Defaults to 1 (no cross-validation). Yields ------ rdm : ndarray, shape (n_patches, n_items * (n_items - 1)) A RDM (in condensed form) for each searchlight patch. Attributes ---------- shape : tuple of int Multidimensional shape of the generted RDMs. This is useful for re-shaping the result obtained after consuming the this generator. For a spatio-temporal searchlight: Three elements: the number of time-series, number of time samples and length of a consensed RDM. For a spatial searchlight: Two element: the number of time-series and length of a condensed RDM. For a temporal searchlight: Two elements: the number of time-samples and length of a condensed RDM. For no searchlight: One element: the length of a condensed RDM. See Also -------- rdm rsa searchlight """ def __init__( self, X, patches=None, dist_metric="correlation", dist_params=dict(), y=None, n_folds=1, n_jobs=1, ): if patches is None: patches = searchlight(X.shape) # Create folds for cross-validated RDM metrics self.X = create_folds(X, y, n_folds) # The data is now folds x items x n_series x ... self.patches = patches self.dist_metric = dist_metric self.dist_params = dist_params self.use_cv = len(self.X) > 1 # More than one fold present # Target shape for an array that would hold all of the generated RDMs. rdm_length = len(np.triu_indices(self.X.shape[1], k=1)[0]) self.shape = patches.shape + (rdm_length,) self.n_jobs = n_jobs # Setup the generator that will be producing the RDMs self._generator = iter(self)
[docs] def __iter__(self): """Build a new iterator that starts from the beginning.""" return self._iter_rdms()
def __next__(self): """Generate a RDM for each patch.""" return next(self._generator) def _iter_rdms(self): par = Parallel(n_jobs=self.n_jobs, return_as="generator") if self.use_cv: yield from par( delayed(compute_rdm_cv)( self.X[(slice(None),) + patch], self.dist_metric, **self.dist_params ) for patch in self.patches ) else: yield from par( delayed(compute_rdm)( self.X[0][patch], self.dist_metric, **self.dist_params ) for patch in self.patches )
[docs] def __len__(self): """Get total number of RDMs that will be generated.""" return len(self.patches)