Source code for nuee.dissimilarity.anosim

"""
ANOSIM (Analysis of Similarities).

This module provides an implementation of ANOSIM that follows the
approach used in ``vegan::anosim``: it ranks distances, computes the
between- vs. within-group statistic, and estimates p-values by permutation.
"""

from __future__ import annotations

import numpy as np
import pandas as pd
from scipy.spatial.distance import squareform
from typing import Dict, Optional, Union


ArrayLike = Union[np.ndarray, pd.DataFrame, pd.Series]


def _as_numpy_distance(matrix: ArrayLike) -> np.ndarray:
    if isinstance(matrix, pd.DataFrame):
        matrix = matrix.values
    matrix = np.asarray(matrix, dtype=float)
    if matrix.ndim == 1:
        matrix = squareform(matrix)
    if matrix.shape[0] != matrix.shape[1]:
        raise ValueError("Distance matrix must be square or condensed.")
    if not np.allclose(matrix, matrix.T, atol=1e-12):
        raise ValueError("Distance matrix must be symmetric.")
    return matrix


def _prepare_grouping(grouping: ArrayLike, n: int) -> np.ndarray:
    if isinstance(grouping, pd.Series):
        grouping = grouping.values
    grouping = np.asarray(grouping)
    if grouping.ndim != 1:
        raise ValueError("Grouping vector must be one-dimensional.")
    if grouping.shape[0] != n:
        raise ValueError("Grouping vector length must match the distance matrix.")
    return grouping



[docs]
def anosim(distance_matrix: ArrayLike,
           grouping: ArrayLike,
           permutations: int = 999,
           random_state: Optional[Union[int, np.random.Generator]] = None,
           **kwargs) -> Dict[str, Union[float, int]]:
    """
    Analysis of similarities (ANOSIM).

    Parameters
    ----------
    distance_matrix:
        Square or condensed distance matrix.
    grouping:
        Group assignments for each observation.
    permutations:
        Number of permutations used to estimate the p-value. Set to 0 to skip.
    random_state:
        Seed controlling permutation reproducibility.

    Returns
    -------
    dict
        Dictionary with ``r_statistic``, ``p_value`` and ``permutations``.
    """
    D = _as_numpy_distance(distance_matrix)
    n = D.shape[0]
    grouping = _prepare_grouping(grouping, n)

    iu = np.triu_indices(n, k=1)
    distances = D[iu]
    ranks = distances.argsort().argsort().astype(float) + 1.0

    group_labels, inverse = np.unique(grouping, return_inverse=True)
    n_groups = len(group_labels)
    if n_groups < 2:
        raise ValueError("ANOSIM requires at least two groups.")

    between_values = []
    within_values = []
    idx = 0
    for i in range(n - 1):
        gi = inverse[i]
        for j in range(i + 1, n):
            gj = inverse[j]
            if gi == gj:
                within_values.append(ranks[idx])
            else:
                between_values.append(ranks[idx])
            idx += 1

    between_values = np.asarray(between_values, dtype=float)
    within_values = np.asarray(within_values, dtype=float)
    rb = between_values.mean() if between_values.size else 0.0
    rw = within_values.mean() if within_values.size else 0.0
    m = ranks.size
    denominator = m / 2 if m > 0 else 1.0
    R_obs = (rb - rw) / denominator

    p_value = np.nan
    if permutations and permutations > 0:
        rng = np.random.default_rng(random_state)
        exceed = 0
        for _ in range(permutations):
            permuted = rng.permutation(inverse)
            between_p = []
            within_p = []
            idx = 0
            for i in range(n - 1):
                gi = permuted[i]
                for j in range(i + 1, n):
                    gj = permuted[j]
                    if gi == gj:
                        within_p.append(ranks[idx])
                    else:
                        between_p.append(ranks[idx])
                    idx += 1
            between_p = np.asarray(between_p, dtype=float)
            within_p = np.asarray(within_p, dtype=float)
            denom = m / 2 if m > 0 else 1.0
            rb_p = between_p.mean() if between_p.size else 0.0
            rw_p = within_p.mean() if within_p.size else 0.0
            R_perm = (rb_p - rw_p) / denom
            if R_perm >= R_obs - 1e-12:
                exceed += 1
        p_value = (exceed + 1) / (permutations + 1)

    return {
        "r_statistic": float(R_obs),
        "p_value": float(p_value) if np.isfinite(p_value) else np.nan,
        "permutations": permutations,
    }
Source code for nuee.dissimilarity.anosim

nuee

Navigation

Related Topics