Source code for pyvallocation.bayesian

from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import Optional, Union

import numpy as np
import numpy.typing as npt
from scipy import linalg as sla
from scipy.stats import chi2
import pandas as pd


def _cholesky_pd(mat: npt.NDArray[np.floating], jitter: float = 1e-12) -> npt.NDArray[np.floating]:
    r"""Computes a Cholesky decomposition that is robust to numerical errors.

    The theoretical framework for robust Bayesian allocation often requires
    operations like spectral or Cholesky decompositions of covariance matrices,
    which must be positive-definite (PD). However, matrices estimated
    from real-world data may fail to be perfectly PD due to floating-point
    inaccuracies or estimation errors.

    This function attempts to compute the Cholesky decomposition of `mat`. If
    `mat` is not PD, it adds a small multiple of the identity matrix
    (`jitter` * I) to the matrix to make it PD and then retries. This is a
    standard numerical stabilization technique.

    Args:
        mat: The square matrix (N x N) for which to compute the Cholesky
            decomposition.
        jitter: A small positive constant to add to the diagonal elements if
            the matrix is not positive-definite. Defaults to 1e-12.

    Returns:
        The lower Cholesky factor of `mat` (or `mat` + `jitter` * I).

    Raises:
        ValueError: If `mat` is not a square matrix.
        RuntimeError: If the matrix is not positive-definite even after
            adding jitter.
    """
    if mat.ndim != 2 or mat.shape[0] != mat.shape[1]:
        raise ValueError("Input to _cholesky_pd must be a square matrix.")
    try:
        return sla.cholesky(mat, lower=True, check_finite=False)
    except sla.LinAlgError:
        mat_jittered = mat + jitter * np.eye(mat.shape[0])
        warnings.warn(
            "Matrix not positive-definite; added jitter for Cholesky decomposition.",
            RuntimeWarning
        )
        try:
            return sla.cholesky(mat_jittered, lower=True, check_finite=False)
        except sla.LinAlgError as exc:
            raise RuntimeError("Matrix not positive-definite after adding jitter.") from exc



[docs]
def chi2_quantile(p: float, dof: int, sqrt: bool = False) -> float:
    r"""Computes the quantile of the chi-square (χ²) distribution.

    This function is used to determine the size of the uncertainty ellipsoids
    for the market parameters (mean and covariance). The size is
    determined by a radius factor, :math:`q`, which is set according to a
    quantile of the chi-square distribution.

    For the mean vector :math:`\mu`, under the assumption that its posterior
    distribution is normal, the squared Mahalanobis distance is chi-square
    distributed. The radius factor squared, :math:`q_\mu^2`, is set using a
    quantile of the :math:`\chi^2_N` distribution.

    .. math::
        q_\mu^2 = Q_{\chi_N^2}(p_\mu)

    For the covariance matrix :math:`\Sigma`, a similar approach is used based on a
    heuristic argument that the Mahalanobis distance behaves like a
    :math:`\chi^2` distribution with :math:`N(N+1)/2` degrees of freedom
    .

    .. math::
        q_\Sigma^2 = Q_{\chi_{N(N+1)/2}^2}(p_\Sigma)

    Args:
        p: The probability level (0 < p < 1) for the quantile.
        dof: The degrees of freedom for the chi-square distribution.
        sqrt: If True, returns the square root of the quantile. Defaults to False.

    Returns:
        The chi-square quantile :math:`Q_{\chi²}(p)` or :math:`\sqrt{Q_{\chi²}(p)}`
        if `sqrt` is True.

    Raises:
        ValueError: If `p` is not strictly between 0 and 1.
    """
    if not (0.0 < p < 1.0):
        raise ValueError("Probability `p` must be strictly between 0 and 1.")
    q_val = chi2.ppf(p, dof)
    return float(np.sqrt(q_val)) if sqrt else float(q_val)




[docs]
@dataclass()
class NIWParams:
    r"""A container for the parameters of a Normal-Inverse-Wishart (NIW) posterior distribution. :no-inheritance:

    These parameters are the result of a Bayesian update, combining an
    NIW prior with market data, as detailed in Meucci (2005). The
    formulas for these posterior parameters are given in Eqs. (11)-(14).
    """
    T1: int
    #: The posterior pseudo-observations for the mean (:math:`T_1`),
    #: representing the updated confidence in the mean estimate.
    
    mu1: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"]
    #: The posterior mean vector (:math:`\mu_1`), which is the updated
    #: estimate of the expected returns.
    
    nu1: int
    #: The posterior degrees of freedom for the covariance
    #: (:math:`\nu_1`), representing the updated confidence in the
    #: covariance estimate.
    
    sigma1: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]

    #: The posterior scale matrix for the covariance (:math:`\Sigma_1`),
    #: which is the updated scale matrix of the Inverse-Wishart
    #: distribution.


[docs]
class NIWPosterior:
    r"""Computes and manages Normal-Inverse-Wishart (NIW) posterior parameters.

    This class implements the Bayesian update rules for an NIW distribution,
    which is the conjugate prior for a multivariate normal likelihood with
    unknown mean and covariance. The methodology follows Section 3
    of Meucci (2005). It provides methods to calculate posterior
    parameters, classical-equivalent estimators, and factors used in robust
    Bayesian asset allocation.

    The model assumes that asset returns are independently and identically
    distributed according to a normal distribution. The investor's
    prior knowledge is modeled as an NIW distribution.

    How to Use:

    1.  Initialize the :class:`NIWPosterior` object with prior parameters:

        * ``prior_mu`` (:math:`\mu_0`): The prior estimate for the mean vector.
        * ``prior_sigma`` (:math:`\Sigma_0`): The prior scale matrix for the covariance.
        * ``t0`` (:math:`T_0`): The confidence in ``prior_mu``, expressed as a
            pseudo-count of observations.
        * ``nu0`` (:math:`\nu_0`): The confidence in ``prior_sigma``, expressed as a
            pseudo-count of observations.

    2.  Call the :meth:`update` method with sample statistics from observed data:

        * ``sample_mu`` (:math:`\hat{\mu}`): The mean vector from the data.
        * ``sample_sigma`` (:math:`\hat{\Sigma}`): The covariance matrix from the data.
        * ``n_obs`` (:math:`T`): The number of observations in the data sample.

    3.  The :meth:`update` method returns an :class:`NIWParams` object with the
        posterior parameters (:math:`T_1, \mu_1, \nu_1, \Sigma_1`), which are
        a blend of the prior and the market data.

    4.  Use accessor methods like :meth:`get_mu_ce`, :meth:`get_S_mu`, etc., to
        retrieve various quantities derived from the posterior distribution.

    Attributes:
        prior_mu: The prior mean vector (:math:`\mu_0`).
        prior_sigma: The prior scale matrix (:math:`\Sigma_0`).
        t0: The prior pseudo-count for the mean (:math:`T_0`).
        nu0: The prior pseudo-count for the covariance (:math:`\nu_0`).
        N: The number of assets.
        _asset_index: Stores pandas.Index if pandas objects are used.
        _posterior: Stores the computed posterior parameters.
    """

    def __init__(
        self,
        prior_mu: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"],
        prior_sigma: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"],
        t0: int,
        nu0: int,
    ) -> None:
        r"""Initializes the NIWPosterior object with prior parameters.

        The prior parameters :math:`(\mu_0, \Sigma_0)` represent the investor's
        experience, while :math:`(T_0, \nu_0)` represent their confidence in
        that experience.

        Args:
            prior_mu: 1D array (length N) of prior means (:math:`\mu_0`).
            prior_sigma: 2D array (N x N) of the prior scale matrix (:math:`\Sigma_0`).
            t0: Prior pseudo-count for the mean (:math:`T_0`). Must be > 0.
            nu0: Prior pseudo-count for the covariance (:math:`\nu_0`). Must be >= 0.

        Raises:
            ValueError: If input parameters have inconsistent shapes or
                invalid values.
        """
        # Detect pandas inputs
        self._pandas = False
        self._asset_index: Optional[pd.Index] = None

        if isinstance(prior_mu, pd.Series):
            self._pandas = True
            self._asset_index = prior_mu.index.copy()
            self.prior_mu: npt.NDArray[np.floating] = prior_mu.values.astype(float)
        else:
            self.prior_mu = np.asarray(prior_mu, dtype=float)

        if self.prior_mu.ndim != 1:
            raise ValueError("`prior_mu` must be a 1D array or pandas.Series.")
        self.N: int = self.prior_mu.size
        if self.N == 0:
            raise ValueError("`prior_mu` cannot be empty; N must be > 0.")

        if isinstance(prior_sigma, pd.DataFrame):
            if self._asset_index is None:
                # If prior_mu was not pandas but prior_sigma is, infer index
                self._pandas = True
                self._asset_index = prior_sigma.index.copy()
            else:
                # Ensure indices match
                if not prior_sigma.index.equals(self._asset_index) or not prior_sigma.columns.equals(self._asset_index):
                    raise ValueError("Index/columns of prior_sigma must match index of prior_mu.")
            self.prior_sigma: npt.NDArray[np.floating] = prior_sigma.values.astype(float)
        else:
            self.prior_sigma = np.asarray(prior_sigma, dtype=float)

        if self.prior_sigma.ndim != 2 or self.prior_sigma.shape != (self.N, self.N):
            raise ValueError(f"`prior_sigma` must be a square matrix of shape ({self.N}, {self.N}), or a pandas.DataFrame with matching index/columns.")

        if t0 <= 0:
            raise ValueError("`t0` (prior pseudo-count for mean) must be strictly positive.")
        if nu0 < 0:
            raise ValueError("`nu0` (prior pseudo-count for covariance) must be non-negative.")

        _ = _cholesky_pd(self.prior_sigma)  # Ensure prior_sigma is PD or near-PD

        self.t0: int = int(t0)
        self.nu0: int = int(nu0)
        self._posterior: Optional[NIWParams] = None


[docs]
    def update(
        self,
        sample_mu: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"],
        sample_sigma: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"],
        n_obs: int,
    ) -> NIWParams:
        r"""Updates the posterior parameters using sample statistics.

        This method implements the Bayesian update rules for the NIW parameters
        as given by Eqs. (11)–(14) in Meucci (2005).

        .. math::
            \begin{align*}
            T_1 &= T_0 + T \\
            \mu_1 &= \frac{T_0\mu_0 + T\hat{\mu}}{T_1} \\
            \nu_1 &= \nu_0 + T \\
            \Sigma_1 &= \frac{1}{\nu_1} \left[ \nu_0\Sigma_0 + T\hat{\Sigma} + \frac{(\mu_0 - \hat{\mu})(\mu_0 - \hat{\mu})'}{\frac{1}{T} + \frac{1}{T_0}} \right]
            \end{align*}

        The resulting posterior parameters blend the investor's prior with
        information from the market, with the balance determined by the
        relative confidence levels (:math:`T_0, \nu_0`) versus the amount of
        data (:math:`T`).

        Args:
            sample_mu: 1D array (length N) of sample means (:math:`\hat{\mu}`).
            sample_sigma: 2D array (N x N) of the sample covariance matrix
                (:math:`\hat{\Sigma}`).
            n_obs: The number of observations in the sample (:math:`T`).

        Returns:
            A :class:`NIWParams` instance with the updated posterior parameters.
            If pandas objects were used in initialization, returns :math:`\mu_1`
            as a Series and :math:`\Sigma_1` as a DataFrame.

        Raises:
            ValueError: If sample statistics have inconsistent shapes or ``n_obs``
                is not a positive integer.
        """
        # Convert pandas inputs if present
        if isinstance(sample_mu, pd.Series):
            smu = sample_mu.values.astype(float)
        else:
            smu = np.asarray(sample_mu, dtype=float)

        if smu.ndim != 1 or smu.shape[0] != self.N:
            raise ValueError(f"`sample_mu` must be a 1D array or pandas.Series of length {self.N}.")

        if n_obs <= 0:
            raise ValueError("`n_obs` (number of observations) must be strictly positive.")

        if isinstance(sample_sigma, pd.DataFrame):
            ssigma = sample_sigma.values.astype(float)
        else:
            ssigma = np.asarray(sample_sigma, dtype=float)

        if ssigma.ndim != 2 or ssigma.shape != (self.N, self.N):
            raise ValueError(f"`sample_sigma` must be a square matrix of shape ({self.N}, {self.N}), or pandas.DataFrame with matching index/columns.")

        _ = _cholesky_pd(ssigma)  # Ensure sample_sigma is PD or near-PD

        # Compute posterior scalars
        T1 = self.t0 + n_obs
        nu1 = self.nu0 + n_obs
        mu1_array = (self.t0 * self.prior_mu + n_obs * smu) / T1

        cross_term_weight_denominator = (1.0 / n_obs + 1.0 / self.t0)
        diff_mu = self.prior_mu - smu
        outer_prod_diff_mu = np.outer(diff_mu, diff_mu)
        cross_term_weighted = outer_prod_diff_mu / cross_term_weight_denominator

        sigma1_numerator = (self.nu0 * self.prior_sigma
                            + n_obs * ssigma
                            + cross_term_weighted)
        if nu1 <= 0:
            raise ValueError("Posterior degrees of freedom ν₁ must be positive.")
        sigma1_array = sigma1_numerator / nu1

        _ = _cholesky_pd(sigma1_array)  # Ensure Σ₁ is PD or near-PD

        # Wrap into pandas if appropriate
        if self._pandas and self._asset_index is not None:
            mu1: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"] = pd.Series(
                mu1_array, index=self._asset_index
            )
            sigma1: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"] = pd.DataFrame(
                sigma1_array, index=self._asset_index, columns=self._asset_index
            )
        else:
            mu1 = mu1_array
            sigma1 = sigma1_array

        self._posterior = NIWParams(T1=T1, mu1=mu1, nu1=nu1, sigma1=sigma1)
        return self._posterior



[docs]
    def get_posterior(self) -> Optional[NIWParams]:
        r"""Retrieves the computed posterior parameters.

        Returns:
            A :class:`.NIWParams` instance containing the posterior parameters
            (:math:`T_1`, :math:`\mu_1`, :math:`\nu_1`, :math:`\Sigma_1`), or ``None``
            if :meth:`~.NIWPosterior.update` has not been called.

        """
        return self._posterior



[docs]
    def get_mu_ce(self) -> Union[npt.NDArray[np.floating], "pd.Series[np.floating]"]:
        r"""Computes the classical-equivalent estimator for the mean, :math:`\hat{\mu}_{ce}`.

        For the NIW model, this estimator is the posterior mean :math:`\mu_1`,
        as defined in Eq. (15).

        .. math::
            \hat{\mu}_{ce} = \mu_1

        Returns:
            The posterior mean vector :math:`\mu_1` as a NumPy array or pandas
            Series.

        Raises:
            RuntimeError: If posterior parameters have not been computed via
                :meth:`update`.
        """
        if self._posterior is None:
            raise RuntimeError("Posterior parameters not computed. Call `update()` first.")
        mu1 = self._posterior.mu1
        # If stored as numpy but we want to return pandas, wrap here:
        if self._pandas and not isinstance(mu1, pd.Series) and self._asset_index is not None:
            return pd.Series(mu1, index=self._asset_index)
        return mu1



[docs]
    def get_S_mu(self) -> Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]:
        r"""Computes the scatter matrix :math:`S_{\mu}` for the posterior of :math:`\mu`.

        This matrix describes the dispersion of the marginal posterior
        distribution of :math:`\mu` and is used to define the location-dispersion
        ellipsoid for robust optimization. It is defined in Eq. (16):

        .. math::
            S_{\mu} = \frac{1}{T_1} \frac{\nu_1}{\nu_1 - 2} \Sigma_1

        This computation requires the posterior degrees of freedom :math:`\nu_1 > 2`.

        Returns:
            The scatter matrix :math:`S_{\mu}` as a NumPy array or pandas DataFrame.

        Raises:
            RuntimeError: If posterior parameters have not been computed.
            ValueError: If :math:`\nu_1 \le 2`, as the scatter matrix is not
                defined.
        """
        if self._posterior is None:
            raise RuntimeError("Posterior parameters not computed. Call `update()` first.")
        if self._posterior.nu1 <= 2:
            raise ValueError("Posterior degrees of freedom ν₁ must be greater than 2 to compute S_μ.")
        factor = self._posterior.nu1 / (self._posterior.T1 * (self._posterior.nu1 - 2.0))
        # Ensure underlying data is array for multiplication
        sigma1_array = self._posterior.sigma1
        if isinstance(sigma1_array, pd.DataFrame):
            sigma1_array = sigma1_array.values

        S_mu_array = factor * sigma1_array

        if self._pandas and self._asset_index is not None:
            return pd.DataFrame(S_mu_array, index=self._asset_index, columns=self._asset_index)
        return S_mu_array




[docs]
    def get_sigma_ce(self) -> Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]:
        r"""Computes the classical-equivalent estimator for the covariance, :math:`\hat{\Sigma}_{ce}`.

        This estimator is a shrunk version of the posterior scale matrix
        :math:`\Sigma_1`, as defined in Eq. (17). It serves as the center of
        the uncertainty ellipsoid for :math:`\Sigma`.

        .. math::
            \hat{\Sigma}_{ce} = \frac{\nu_1}{\nu_1 + N + 1} \Sigma_1

        Returns:
            The classical-equivalent estimator :math:`\hat{\Sigma}_{ce}` as a NumPy
            array or pandas DataFrame.

        Raises:
            RuntimeError: If posterior parameters have not been computed.
            ValueError: If :math:`\nu_1 + N + 1 = 0`, which is highly unlikely
                with valid inputs.
        """
        if self._posterior is None:
            raise RuntimeError("Posterior parameters not computed. Call `update()` first.")
        denom = self._posterior.nu1 + self.N + 1.0
        if denom == 0:
            raise ValueError("Denominator (ν₁ + N + 1) for Σ_ce is zero.")
        factor = self._posterior.nu1 / denom

        sigma1_array = self._posterior.sigma1
        if isinstance(sigma1_array, pd.DataFrame):
            sigma1_array = sigma1_array.values

        sigma_ce_array = factor * sigma1_array
        if self._pandas and self._asset_index is not None:
            return pd.DataFrame(sigma_ce_array, index=self._asset_index, columns=self._asset_index)
        return sigma_ce_array



[docs]
    def cred_radius_mu(self, p_mu: float) -> float:
        r"""Computes the credibility factor :math:`\gamma_\mu` for the mean's uncertainty.

        This factor, :math:`\gamma_\mu`, appears in the simplified robust
        mean-variance optimization problem (Eq. 19). It scales the
        portfolio's posterior standard deviation to penalize for estimation
        risk in the mean vector. Its formula is given by Eq. (20):

        .. math::
            \gamma_\mu = \sqrt{ \frac{q_\mu^2}{T_1} \frac{\nu_1}{\nu_1 - 2} }

        where :math:`q_\mu^2 = Q_{\chi^2_N}(p_{mu})` is the squared radius factor
        from the chi-square distribution.

        Args:
            p_mu: The confidence level for :math:`\mu` (0 < p_mu < 1), which
                reflects aversion to estimation risk.

        Returns:
            The credibility factor :math:`\gamma_\mu`.

        Raises:
            RuntimeError: If posterior parameters have not been computed.
            ValueError: If :math:`\nu_1 \le 2` or `p_mu` is not in (0,1).
        """
        if self._posterior is None:
            raise RuntimeError("Posterior parameters not computed. Call `update()` first.")
        if self._posterior.nu1 <= 2:
            raise ValueError("Posterior ν₁ must be greater than 2 for γ_μ calculation.")
        if not (0.0 < p_mu < 1.0):
            raise ValueError("Confidence level p_mu must be between 0 and 1 (exclusive).")
        q_mu_squared = chi2_quantile(p_mu, self.N, sqrt=False)
        term_T1 = self._posterior.T1
        term_nu1 = self._posterior.nu1
        gamma_mu = np.sqrt((q_mu_squared / term_T1) * (term_nu1 / (term_nu1 - 2.0)))
        return gamma_mu



[docs]
    def cred_radius_sigma_factor(self, p_sigma: float) -> float:
        r"""Computes the scaling factor for the worst-case portfolio variance.

        In the robust framework, the maximum possible variance of a portfolio
        within the uncertainty ellipsoid for :math:`\Sigma` is not simply
        :math:`w'\Sigma_1 w`, but a scaled version of it. This method computes
        that scaling factor, which we can call :math:`C_\Sigma`. The derivation is
        shown in Appendix 7.2, and the final result is presented in the
        maximization step in Eq. (47):

        .. math::
            \max_{\Sigma \in \Theta_\Sigma} w'\Sigma w = \underbrace{ \left[ \frac{\nu_1}{\nu_1 + N + 1} + \sqrt{\frac{2\nu_1^2 q_\Sigma^2}{(\nu_1 + N + 1)^3}} \right] }_{C_\Sigma} (w'\Sigma_1 w)

        where :math:`q_\Sigma^2 = Q_{\chi^2_{dof}}(p_\Sigma)` with
        :math:`dof = N(N+1)/2`.

        Args:
            p_sigma: The confidence level for :math:`\Sigma` (0 < p_sigma < 1),
                reflecting aversion to estimation risk.

        Returns:
            The credibility factor :math:`C_\Sigma` for scaling the portfolio variance.

        Raises:
            RuntimeError: If posterior parameters have not been computed.
            ValueError: If `p_sigma` is not in (0,1) or internal terms are invalid.
        """
        if self._posterior is None:
            raise RuntimeError("Posterior parameters not computed. Call `update()` first.")
        if not (0.0 < p_sigma < 1.0):
            raise ValueError("Confidence level p_sigma must be between 0 and 1 (exclusive).")

        nu1 = self._posterior.nu1
        denom_cubed_base = nu1 + self.N + 1.0
        if denom_cubed_base <= 0:
            raise ValueError("Term (ν₁ + N + 1) must be positive for C_Σ calculation.")

        dof = self.N * (self.N + 1) // 2
        q_sigma_squared_val = chi2_quantile(p_sigma, dof, sqrt=False)

        term1 = nu1 / denom_cubed_base
        numerator_term2 = 2.0 * nu1**2 * q_sigma_squared_val
        denominator_term2 = denom_cubed_base**3
        if denominator_term2 == 0:
            raise ValueError("Denominator for sqrt term in C_Σ is zero.")
        term2_arg = numerator_term2 / denominator_term2
        if term2_arg < 0:
            # This should not happen with valid inputs but good to check
            warnings.warn(
                f"Argument for sqrt in C_Σ calculation is negative ({term2_arg:f}); result may be NaN.",
                RuntimeWarning
            )
        term2 = np.sqrt(term2_arg)
        C_sigma = term1 + term2
        return C_sigma