"""Functions to perform statistical significance testing."""
from typing import Dict, Optional, Tuple
import numpy as np
from osl_dynamics.glm import DesignConfig, MaxStatPermutation
[docs]
def evoked_response_max_stat_perm(
data: np.ndarray,
n_perm: int,
covariates: Optional[Dict[str, np.ndarray]] = None,
metric: str = "copes",
n_jobs: int = 1,
) -> np.ndarray:
r"""Statistical significance testing for evoked responses.
This function fits a General Linear Model (GLM) with ordinary least
squares and performs a sign-flip permutation to build a null distribution
using maximum statistic across the target dimensions.
Parameters
----------
data : np.ndarray
Data array for baseline corrected evoked responses.
Shape is (n_samples, \*target_dims).
n_perm : int
Number of permutations.
covariates : dict, optional
Dictionary of continuous covariates to include as confound regressors.
Each key is a covariate name and each value is a 1D array of length
n_samples. These are z-scored internally. Example::
covariates = {
"age": np.array([25, 30, 28, ...]),
"head_size": np.array([55.1, 57.3, 54.8, ...]),
}
metric : str, optional
Statistic to compute p-values.
Options are :code:`'copes'` or :code:`'tstats'`.
n_jobs : int, optional
Number of jobs to run in parallel.
Returns
-------
pvalues : np.ndarray
P-values. Shape is (\*target_dims,).
"""
features = [
{"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"}
]
if covariates is None:
covariates = {}
for key, value in covariates.items():
features.append({"name": key, "values": value, "feature_type": "continuous"})
contrasts = [{"name": "Mean", "values": [1] + [0] * len(covariates)}]
DC = DesignConfig(features=features, contrasts=contrasts)
design = DC.create_design()
perm = MaxStatPermutation(
design,
contrast_indx=0,
n_perm=n_perm,
n_jobs=n_jobs,
)
perm.fit(data)
pvalues = perm.get_pvalues(metric=metric)
return pvalues
[docs]
def group_diff_max_stat_perm(
data: np.ndarray,
assignments: np.ndarray,
n_perm: int,
covariates: Optional[Dict[str, np.ndarray]] = None,
metric: str = "tstats",
n_jobs: int = 1,
) -> Tuple[np.ndarray, np.ndarray]:
r"""Statistical significance testing for difference between two groups.
This function fits a General Linear Model (GLM) with ordinary least
squares and performs a row-shuffle permutation to build a null distribution
using maximum statistic across the target dimensions (target_dims)
for the difference between two groups.
Parameters
----------
data : np.ndarray
Data array for baseline corrected evoked responses.
Shape is (n_samples, \*target_dims).
assignments : np.ndarray
Group assignments. Shape is (n_samples,).
Must have exactly two unique values.
n_perm : int
Number of permutations.
covariates : dict, optional
Dictionary of continuous covariates to include as confound regressors.
Each key is a covariate name and each value is a 1D array of length
n_samples. These are z-scored internally. Example::
covariates = {
"age": np.array([25, 30, 28, ...]),
"head_size": np.array([55.1, 57.3, 54.8, ...]),
}
metric : str, optional
Statistic to compute p-values with.
Options are :code:`'copes'` or :code:`'tstats'`.
n_jobs : int, optional
Number of jobs to run in parallel.
Returns
-------
group_diff : np.ndarray
Difference between two groups. Shape is (\*target_dims,).
pvalues : np.ndarray
P-values. Shape is (\*target_dims,).
"""
if covariates is None:
covariates = {}
unique_groups = np.unique(assignments)
if len(unique_groups) != 2:
raise ValueError("assignments must have exactly two unique values.")
features = [
{
"name": "Group1",
"values": (assignments == unique_groups[0]).astype(int),
"feature_type": "categorical",
},
{
"name": "Group2",
"values": (assignments == unique_groups[1]).astype(int),
"feature_type": "categorical",
},
]
for key, value in covariates.items():
features.append({"name": key, "values": value, "feature_type": "continuous"})
contrasts = [{"name": "GroupDiff", "values": [1, -1] + [0] * len(covariates)}]
DC = DesignConfig(features=features, contrasts=contrasts)
design = DC.create_design()
perm = MaxStatPermutation(
design,
contrast_indx=0,
n_perm=n_perm,
n_jobs=n_jobs,
)
perm.fit(data)
group_diff = perm.copes
pvalues = perm.get_pvalues(metric=metric)
return group_diff, pvalues
[docs]
def paired_diff_max_stat_perm(
data: np.ndarray,
n_perm: int,
metric: str = "copes",
n_jobs: int = 1,
) -> Tuple[np.ndarray, np.ndarray]:
r"""Statistical significance testing for paired difference.
This function fits a General Linear Model (GLM) with ordinary least
squares and performs sign-flip permutations to build a null distribution
with the maximum statistic across the target dimensions for the paired
differences.
Parameters
----------
data : np.ndarray
Data array for baseline corrected evoked responses.
Shape is (n_samples, \*target_dims).
n_perm : int
Number of permutations.
metric : str, optional
Statistic to compute p-values with.
Options are :code:`'copes'` or :code:`'tstats'`.
n_jobs : int, optional
Number of jobs to run in parallel.
Returns
-------
paired_diff : np.ndarray
Paired differences. Shape is (\*target_dims,).
pvalues : np.ndarray
P-values. Shape is (\*target_dims,).
"""
features = [
{"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"}
]
contrasts = [{"name": "Mean", "values": [1]}]
DC = DesignConfig(features=features, contrasts=contrasts)
design = DC.create_design()
perm = MaxStatPermutation(
design,
contrast_indx=0,
n_perm=n_perm,
n_jobs=n_jobs,
)
perm.fit(data)
pvalues = perm.get_pvalues(metric=metric)
paired_diff = perm.copes
return paired_diff, pvalues