Source code for osl_dynamics.analysis.statistics

"""Functions to perform statistical significance testing."""

from typing import Dict, Optional, Tuple

import numpy as np

from osl_dynamics.glm import DesignConfig, MaxStatPermutation


[docs] def evoked_response_max_stat_perm( data: np.ndarray, n_perm: int, covariates: Optional[Dict[str, np.ndarray]] = None, metric: str = "copes", n_jobs: int = 1, ) -> np.ndarray: r"""Statistical significance testing for evoked responses. This function fits a General Linear Model (GLM) with ordinary least squares and performs a sign-flip permutation to build a null distribution using maximum statistic across the target dimensions. Parameters ---------- data : np.ndarray Data array for baseline corrected evoked responses. Shape is (n_samples, \*target_dims). n_perm : int Number of permutations. covariates : dict, optional Dictionary of continuous covariates to include as confound regressors. Each key is a covariate name and each value is a 1D array of length n_samples. These are z-scored internally. Example:: covariates = { "age": np.array([25, 30, 28, ...]), "head_size": np.array([55.1, 57.3, 54.8, ...]), } metric : str, optional Statistic to compute p-values. Options are :code:`'copes'` or :code:`'tstats'`. n_jobs : int, optional Number of jobs to run in parallel. Returns ------- pvalues : np.ndarray P-values. Shape is (\*target_dims,). """ features = [ {"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"} ] if covariates is None: covariates = {} for key, value in covariates.items(): features.append({"name": key, "values": value, "feature_type": "continuous"}) contrasts = [{"name": "Mean", "values": [1] + [0] * len(covariates)}] DC = DesignConfig(features=features, contrasts=contrasts) design = DC.create_design() perm = MaxStatPermutation( design, contrast_indx=0, n_perm=n_perm, n_jobs=n_jobs, ) perm.fit(data) pvalues = perm.get_pvalues(metric=metric) return pvalues
[docs] def group_diff_max_stat_perm( data: np.ndarray, assignments: np.ndarray, n_perm: int, covariates: Optional[Dict[str, np.ndarray]] = None, metric: str = "tstats", n_jobs: int = 1, ) -> Tuple[np.ndarray, np.ndarray]: r"""Statistical significance testing for difference between two groups. This function fits a General Linear Model (GLM) with ordinary least squares and performs a row-shuffle permutation to build a null distribution using maximum statistic across the target dimensions (target_dims) for the difference between two groups. Parameters ---------- data : np.ndarray Data array for baseline corrected evoked responses. Shape is (n_samples, \*target_dims). assignments : np.ndarray Group assignments. Shape is (n_samples,). Must have exactly two unique values. n_perm : int Number of permutations. covariates : dict, optional Dictionary of continuous covariates to include as confound regressors. Each key is a covariate name and each value is a 1D array of length n_samples. These are z-scored internally. Example:: covariates = { "age": np.array([25, 30, 28, ...]), "head_size": np.array([55.1, 57.3, 54.8, ...]), } metric : str, optional Statistic to compute p-values with. Options are :code:`'copes'` or :code:`'tstats'`. n_jobs : int, optional Number of jobs to run in parallel. Returns ------- group_diff : np.ndarray Difference between two groups. Shape is (\*target_dims,). pvalues : np.ndarray P-values. Shape is (\*target_dims,). """ if covariates is None: covariates = {} unique_groups = np.unique(assignments) if len(unique_groups) != 2: raise ValueError("assignments must have exactly two unique values.") features = [ { "name": "Group1", "values": (assignments == unique_groups[0]).astype(int), "feature_type": "categorical", }, { "name": "Group2", "values": (assignments == unique_groups[1]).astype(int), "feature_type": "categorical", }, ] for key, value in covariates.items(): features.append({"name": key, "values": value, "feature_type": "continuous"}) contrasts = [{"name": "GroupDiff", "values": [1, -1] + [0] * len(covariates)}] DC = DesignConfig(features=features, contrasts=contrasts) design = DC.create_design() perm = MaxStatPermutation( design, contrast_indx=0, n_perm=n_perm, n_jobs=n_jobs, ) perm.fit(data) group_diff = perm.copes pvalues = perm.get_pvalues(metric=metric) return group_diff, pvalues
[docs] def paired_diff_max_stat_perm( data: np.ndarray, n_perm: int, metric: str = "copes", n_jobs: int = 1, ) -> Tuple[np.ndarray, np.ndarray]: r"""Statistical significance testing for paired difference. This function fits a General Linear Model (GLM) with ordinary least squares and performs sign-flip permutations to build a null distribution with the maximum statistic across the target dimensions for the paired differences. Parameters ---------- data : np.ndarray Data array for baseline corrected evoked responses. Shape is (n_samples, \*target_dims). n_perm : int Number of permutations. metric : str, optional Statistic to compute p-values with. Options are :code:`'copes'` or :code:`'tstats'`. n_jobs : int, optional Number of jobs to run in parallel. Returns ------- paired_diff : np.ndarray Paired differences. Shape is (\*target_dims,). pvalues : np.ndarray P-values. Shape is (\*target_dims,). """ features = [ {"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"} ] contrasts = [{"name": "Mean", "values": [1]}] DC = DesignConfig(features=features, contrasts=contrasts) design = DC.create_design() perm = MaxStatPermutation( design, contrast_indx=0, n_perm=n_perm, n_jobs=n_jobs, ) perm.fit(data) pvalues = perm.get_pvalues(metric=metric) paired_diff = perm.copes return paired_diff, pvalues