Source code for osl_dynamics.analysis.statistics

"""Functions to perform statistical significance testing."""

from typing import Dict, Optional, Tuple

import numpy as np

from osl_dynamics.glm import DesignConfig, MaxStatPermutation



[docs]
def evoked_response_max_stat_perm(
    data: np.ndarray,
    n_perm: int,
    covariates: Optional[Dict[str, np.ndarray]] = None,
    metric: str = "copes",
    n_jobs: int = 1,
) -> np.ndarray:
    r"""Statistical significance testing for evoked responses.

    This function fits a General Linear Model (GLM) with ordinary least
    squares and performs a sign-flip permutation to build a null distribution
    using maximum statistic across the target dimensions.

    Parameters
    ----------
    data : np.ndarray
        Data array for baseline corrected evoked responses.
        Shape is (n_samples, \*target_dims).
    n_perm : int
        Number of permutations.
    covariates : dict, optional
        Dictionary of continuous covariates to include as confound regressors.
        Each key is a covariate name and each value is a 1D array of length
        n_samples. These are z-scored internally. Example::

            covariates = {
                "age": np.array([25, 30, 28, ...]),
                "head_size": np.array([55.1, 57.3, 54.8, ...]),
            }

    metric : str, optional
        Statistic to compute p-values.
        Options are :code:`'copes'` or :code:`'tstats'`.
    n_jobs : int, optional
        Number of jobs to run in parallel.

    Returns
    -------
    pvalues : np.ndarray
        P-values. Shape is (\*target_dims,).
    """
    features = [
        {"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"}
    ]
    if covariates is None:
        covariates = {}

    for key, value in covariates.items():
        features.append({"name": key, "values": value, "feature_type": "continuous"})
    contrasts = [{"name": "Mean", "values": [1] + [0] * len(covariates)}]

    DC = DesignConfig(features=features, contrasts=contrasts)
    design = DC.create_design()

    perm = MaxStatPermutation(
        design,
        contrast_indx=0,
        n_perm=n_perm,
        n_jobs=n_jobs,
    )
    perm.fit(data)
    pvalues = perm.get_pvalues(metric=metric)

    return pvalues




[docs]
def group_diff_max_stat_perm(
    data: np.ndarray,
    assignments: np.ndarray,
    n_perm: int,
    covariates: Optional[Dict[str, np.ndarray]] = None,
    metric: str = "tstats",
    n_jobs: int = 1,
) -> Tuple[np.ndarray, np.ndarray]:
    r"""Statistical significance testing for difference between two groups.

    This function fits a General Linear Model (GLM) with ordinary least
    squares and performs a row-shuffle permutation to build a null distribution
    using maximum statistic across the target dimensions (target_dims)
    for the difference between two groups.

    Parameters
    ----------
    data : np.ndarray
        Data array for baseline corrected evoked responses.
        Shape is (n_samples, \*target_dims).
    assignments : np.ndarray
        Group assignments. Shape is (n_samples,).
        Must have exactly two unique values.
    n_perm : int
        Number of permutations.
    covariates : dict, optional
        Dictionary of continuous covariates to include as confound regressors.
        Each key is a covariate name and each value is a 1D array of length
        n_samples. These are z-scored internally. Example::

            covariates = {
                "age": np.array([25, 30, 28, ...]),
                "head_size": np.array([55.1, 57.3, 54.8, ...]),
            }

    metric : str, optional
        Statistic to compute p-values with.
        Options are :code:`'copes'` or :code:`'tstats'`.
    n_jobs : int, optional
        Number of jobs to run in parallel.

    Returns
    -------
    group_diff : np.ndarray
        Difference between two groups. Shape is (\*target_dims,).
    pvalues : np.ndarray
        P-values. Shape is (\*target_dims,).
    """
    if covariates is None:
        covariates = {}

    unique_groups = np.unique(assignments)
    if len(unique_groups) != 2:
        raise ValueError("assignments must have exactly two unique values.")

    features = [
        {
            "name": "Group1",
            "values": (assignments == unique_groups[0]).astype(int),
            "feature_type": "categorical",
        },
        {
            "name": "Group2",
            "values": (assignments == unique_groups[1]).astype(int),
            "feature_type": "categorical",
        },
    ]
    for key, value in covariates.items():
        features.append({"name": key, "values": value, "feature_type": "continuous"})

    contrasts = [{"name": "GroupDiff", "values": [1, -1] + [0] * len(covariates)}]

    DC = DesignConfig(features=features, contrasts=contrasts)
    design = DC.create_design()

    perm = MaxStatPermutation(
        design,
        contrast_indx=0,
        n_perm=n_perm,
        n_jobs=n_jobs,
    )
    perm.fit(data)
    group_diff = perm.copes
    pvalues = perm.get_pvalues(metric=metric)

    return group_diff, pvalues




[docs]
def paired_diff_max_stat_perm(
    data: np.ndarray,
    n_perm: int,
    metric: str = "copes",
    n_jobs: int = 1,
) -> Tuple[np.ndarray, np.ndarray]:
    r"""Statistical significance testing for paired difference.

    This function fits a General Linear Model (GLM) with ordinary least
    squares and performs sign-flip permutations to build a null distribution
    with the maximum statistic across the target dimensions for the paired
    differences.

    Parameters
    ----------
    data : np.ndarray
        Data array for baseline corrected evoked responses.
        Shape is (n_samples, \*target_dims).
    n_perm : int
        Number of permutations.
    metric : str, optional
        Statistic to compute p-values with.
        Options are :code:`'copes'` or :code:`'tstats'`.
    n_jobs : int, optional
        Number of jobs to run in parallel.

    Returns
    -------
    paired_diff : np.ndarray
        Paired differences. Shape is (\*target_dims,).
    pvalues : np.ndarray
        P-values. Shape is (\*target_dims,).
    """
    features = [
        {"name": "Mean", "values": np.ones(data.shape[0]), "feature_type": "constant"}
    ]
    contrasts = [{"name": "Mean", "values": [1]}]

    DC = DesignConfig(features=features, contrasts=contrasts)
    design = DC.create_design()

    perm = MaxStatPermutation(
        design,
        contrast_indx=0,
        n_perm=n_perm,
        n_jobs=n_jobs,
    )
    perm.fit(data)
    pvalues = perm.get_pvalues(metric=metric)
    paired_diff = perm.copes

    return paired_diff, pvalues