Source code for pyvallocation.bayesian

from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import Optional, Union

import numpy as np
import numpy.typing as npt
from scipy import linalg as sla
from scipy.stats import chi2
import pandas as pd


def _cholesky_pd(mat: npt.NDArray[np.floating], jitter: float = 1e-12) -> npt.NDArray[np.floating]:
    r"""Computes a Cholesky decomposition that is robust to numerical errors.

    The theoretical framework for robust Bayesian allocation often requires
    operations like spectral or Cholesky decompositions of covariance matrices,
    which must be positive-definite (PD). However, matrices estimated
    from real-world data may fail to be perfectly PD due to floating-point
    inaccuracies or estimation errors.

    This function attempts to compute the Cholesky decomposition of `mat`. If
    `mat` is not PD, it adds a small multiple of the identity matrix
    (`jitter` * I) to the matrix to make it PD and then retries. This is a
    standard numerical stabilization technique.

    Args:
        mat: The square matrix (N x N) for which to compute the Cholesky
            decomposition.
        jitter: A small positive constant to add to the diagonal elements if
            the matrix is not positive-definite. Defaults to 1e-12.

    Returns:
        The lower Cholesky factor of `mat` (or `mat` + `jitter` * I).

    Raises:
        ValueError: If `mat` is not a square matrix.
        RuntimeError: If the matrix is not positive-definite even after
            adding jitter.
    """
    if mat.ndim != 2 or mat.shape[0] != mat.shape[1]:
        raise ValueError("Input to _cholesky_pd must be a square matrix.")
    try:
        return sla.cholesky(mat, lower=True, check_finite=False)
    except sla.LinAlgError:
        mat_jittered = mat + jitter * np.eye(mat.shape[0])
        warnings.warn(
            "Matrix not positive-definite; added jitter for Cholesky decomposition.",
            RuntimeWarning
        )
        try:
            return sla.cholesky(mat_jittered, lower=True, check_finite=False)
        except sla.LinAlgError as exc:
            raise RuntimeError("Matrix not positive-definite after adding jitter.") from exc


[docs] def chi2_quantile(p: float, dof: int, sqrt: bool = False) -> float: r"""Computes the quantile of the chi-square (χ²) distribution. This function is used to determine the size of the uncertainty ellipsoids for the market parameters (mean and covariance). The size is determined by a radius factor, :math:`q`, which is set according to a quantile of the chi-square distribution. For the mean vector :math:`\mu`, under the assumption that its posterior distribution is normal, the squared Mahalanobis distance is chi-square distributed. The radius factor squared, :math:`q_\mu^2`, is set using a quantile of the :math:`\chi^2_N` distribution. .. math:: q_\mu^2 = Q_{\chi_N^2}(p_\mu) For the covariance matrix :math:`\Sigma`, a similar approach is used based on a heuristic argument that the Mahalanobis distance behaves like a :math:`\chi^2` distribution with :math:`N(N+1)/2` degrees of freedom . .. math:: q_\Sigma^2 = Q_{\chi_{N(N+1)/2}^2}(p_\Sigma) Args: p: The probability level (0 < p < 1) for the quantile. dof: The degrees of freedom for the chi-square distribution. sqrt: If True, returns the square root of the quantile. Defaults to False. Returns: The chi-square quantile :math:`Q_{\chi²}(p)` or :math:`\sqrt{Q_{\chi²}(p)}` if `sqrt` is True. Raises: ValueError: If `p` is not strictly between 0 and 1. """ if not (0.0 < p < 1.0): raise ValueError("Probability `p` must be strictly between 0 and 1.") q_val = chi2.ppf(p, dof) return float(np.sqrt(q_val)) if sqrt else float(q_val)
[docs] @dataclass() class NIWParams: r"""A container for the parameters of a Normal-Inverse-Wishart (NIW) posterior distribution. :no-inheritance: These parameters are the result of a Bayesian update, combining an NIW prior with market data, as detailed in Meucci (2005). The formulas for these posterior parameters are given in Eqs. (11)-(14). """ T1: int #: The posterior pseudo-observations for the mean (:math:`T_1`), #: representing the updated confidence in the mean estimate. mu1: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"] #: The posterior mean vector (:math:`\mu_1`), which is the updated #: estimate of the expected returns. nu1: int #: The posterior degrees of freedom for the covariance #: (:math:`\nu_1`), representing the updated confidence in the #: covariance estimate. sigma1: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]
#: The posterior scale matrix for the covariance (:math:`\Sigma_1`), #: which is the updated scale matrix of the Inverse-Wishart #: distribution.
[docs] class NIWPosterior: r"""Computes and manages Normal-Inverse-Wishart (NIW) posterior parameters. This class implements the Bayesian update rules for an NIW distribution, which is the conjugate prior for a multivariate normal likelihood with unknown mean and covariance. The methodology follows Section 3 of Meucci (2005). It provides methods to calculate posterior parameters, classical-equivalent estimators, and factors used in robust Bayesian asset allocation. The model assumes that asset returns are independently and identically distributed according to a normal distribution. The investor's prior knowledge is modeled as an NIW distribution. How to Use: 1. Initialize the :class:`NIWPosterior` object with prior parameters: * ``prior_mu`` (:math:`\mu_0`): The prior estimate for the mean vector. * ``prior_sigma`` (:math:`\Sigma_0`): The prior scale matrix for the covariance. * ``t0`` (:math:`T_0`): The confidence in ``prior_mu``, expressed as a pseudo-count of observations. * ``nu0`` (:math:`\nu_0`): The confidence in ``prior_sigma``, expressed as a pseudo-count of observations. 2. Call the :meth:`update` method with sample statistics from observed data: * ``sample_mu`` (:math:`\hat{\mu}`): The mean vector from the data. * ``sample_sigma`` (:math:`\hat{\Sigma}`): The covariance matrix from the data. * ``n_obs`` (:math:`T`): The number of observations in the data sample. 3. The :meth:`update` method returns an :class:`NIWParams` object with the posterior parameters (:math:`T_1, \mu_1, \nu_1, \Sigma_1`), which are a blend of the prior and the market data. 4. Use accessor methods like :meth:`get_mu_ce`, :meth:`get_S_mu`, etc., to retrieve various quantities derived from the posterior distribution. Attributes: prior_mu: The prior mean vector (:math:`\mu_0`). prior_sigma: The prior scale matrix (:math:`\Sigma_0`). t0: The prior pseudo-count for the mean (:math:`T_0`). nu0: The prior pseudo-count for the covariance (:math:`\nu_0`). N: The number of assets. _asset_index: Stores pandas.Index if pandas objects are used. _posterior: Stores the computed posterior parameters. """ def __init__( self, prior_mu: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"], prior_sigma: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"], t0: int, nu0: int, ) -> None: r"""Initializes the NIWPosterior object with prior parameters. The prior parameters :math:`(\mu_0, \Sigma_0)` represent the investor's experience, while :math:`(T_0, \nu_0)` represent their confidence in that experience. Args: prior_mu: 1D array (length N) of prior means (:math:`\mu_0`). prior_sigma: 2D array (N x N) of the prior scale matrix (:math:`\Sigma_0`). t0: Prior pseudo-count for the mean (:math:`T_0`). Must be > 0. nu0: Prior pseudo-count for the covariance (:math:`\nu_0`). Must be >= 0. Raises: ValueError: If input parameters have inconsistent shapes or invalid values. """ # Detect pandas inputs self._pandas = False self._asset_index: Optional[pd.Index] = None if isinstance(prior_mu, pd.Series): self._pandas = True self._asset_index = prior_mu.index.copy() self.prior_mu: npt.NDArray[np.floating] = prior_mu.values.astype(float) else: self.prior_mu = np.asarray(prior_mu, dtype=float) if self.prior_mu.ndim != 1: raise ValueError("`prior_mu` must be a 1D array or pandas.Series.") self.N: int = self.prior_mu.size if self.N == 0: raise ValueError("`prior_mu` cannot be empty; N must be > 0.") if isinstance(prior_sigma, pd.DataFrame): if self._asset_index is None: # If prior_mu was not pandas but prior_sigma is, infer index self._pandas = True self._asset_index = prior_sigma.index.copy() else: # Ensure indices match if not prior_sigma.index.equals(self._asset_index) or not prior_sigma.columns.equals(self._asset_index): raise ValueError("Index/columns of prior_sigma must match index of prior_mu.") self.prior_sigma: npt.NDArray[np.floating] = prior_sigma.values.astype(float) else: self.prior_sigma = np.asarray(prior_sigma, dtype=float) if self.prior_sigma.ndim != 2 or self.prior_sigma.shape != (self.N, self.N): raise ValueError(f"`prior_sigma` must be a square matrix of shape ({self.N}, {self.N}), or a pandas.DataFrame with matching index/columns.") if t0 <= 0: raise ValueError("`t0` (prior pseudo-count for mean) must be strictly positive.") if nu0 < 0: raise ValueError("`nu0` (prior pseudo-count for covariance) must be non-negative.") _ = _cholesky_pd(self.prior_sigma) # Ensure prior_sigma is PD or near-PD self.t0: int = int(t0) self.nu0: int = int(nu0) self._posterior: Optional[NIWParams] = None
[docs] def update( self, sample_mu: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"], sample_sigma: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"], n_obs: int, ) -> NIWParams: r"""Updates the posterior parameters using sample statistics. This method implements the Bayesian update rules for the NIW parameters as given by Eqs. (11)–(14) in Meucci (2005). .. math:: \begin{align*} T_1 &= T_0 + T \\ \mu_1 &= \frac{T_0\mu_0 + T\hat{\mu}}{T_1} \\ \nu_1 &= \nu_0 + T \\ \Sigma_1 &= \frac{1}{\nu_1} \left[ \nu_0\Sigma_0 + T\hat{\Sigma} + \frac{(\mu_0 - \hat{\mu})(\mu_0 - \hat{\mu})'}{\frac{1}{T} + \frac{1}{T_0}} \right] \end{align*} The resulting posterior parameters blend the investor's prior with information from the market, with the balance determined by the relative confidence levels (:math:`T_0, \nu_0`) versus the amount of data (:math:`T`). Args: sample_mu: 1D array (length N) of sample means (:math:`\hat{\mu}`). sample_sigma: 2D array (N x N) of the sample covariance matrix (:math:`\hat{\Sigma}`). n_obs: The number of observations in the sample (:math:`T`). Returns: A :class:`NIWParams` instance with the updated posterior parameters. If pandas objects were used in initialization, returns :math:`\mu_1` as a Series and :math:`\Sigma_1` as a DataFrame. Raises: ValueError: If sample statistics have inconsistent shapes or ``n_obs`` is not a positive integer. """ # Convert pandas inputs if present if isinstance(sample_mu, pd.Series): smu = sample_mu.values.astype(float) else: smu = np.asarray(sample_mu, dtype=float) if smu.ndim != 1 or smu.shape[0] != self.N: raise ValueError(f"`sample_mu` must be a 1D array or pandas.Series of length {self.N}.") if n_obs <= 0: raise ValueError("`n_obs` (number of observations) must be strictly positive.") if isinstance(sample_sigma, pd.DataFrame): ssigma = sample_sigma.values.astype(float) else: ssigma = np.asarray(sample_sigma, dtype=float) if ssigma.ndim != 2 or ssigma.shape != (self.N, self.N): raise ValueError(f"`sample_sigma` must be a square matrix of shape ({self.N}, {self.N}), or pandas.DataFrame with matching index/columns.") _ = _cholesky_pd(ssigma) # Ensure sample_sigma is PD or near-PD # Compute posterior scalars T1 = self.t0 + n_obs nu1 = self.nu0 + n_obs mu1_array = (self.t0 * self.prior_mu + n_obs * smu) / T1 cross_term_weight_denominator = (1.0 / n_obs + 1.0 / self.t0) diff_mu = self.prior_mu - smu outer_prod_diff_mu = np.outer(diff_mu, diff_mu) cross_term_weighted = outer_prod_diff_mu / cross_term_weight_denominator sigma1_numerator = (self.nu0 * self.prior_sigma + n_obs * ssigma + cross_term_weighted) if nu1 <= 0: raise ValueError("Posterior degrees of freedom ν₁ must be positive.") sigma1_array = sigma1_numerator / nu1 _ = _cholesky_pd(sigma1_array) # Ensure Σ₁ is PD or near-PD # Wrap into pandas if appropriate if self._pandas and self._asset_index is not None: mu1: Union[npt.NDArray[np.floating], "pd.Series[np.floating]"] = pd.Series( mu1_array, index=self._asset_index ) sigma1: Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"] = pd.DataFrame( sigma1_array, index=self._asset_index, columns=self._asset_index ) else: mu1 = mu1_array sigma1 = sigma1_array self._posterior = NIWParams(T1=T1, mu1=mu1, nu1=nu1, sigma1=sigma1) return self._posterior
[docs] def get_posterior(self) -> Optional[NIWParams]: r"""Retrieves the computed posterior parameters. Returns: A :class:`.NIWParams` instance containing the posterior parameters (:math:`T_1`, :math:`\mu_1`, :math:`\nu_1`, :math:`\Sigma_1`), or ``None`` if :meth:`~.NIWPosterior.update` has not been called. """ return self._posterior
[docs] def get_mu_ce(self) -> Union[npt.NDArray[np.floating], "pd.Series[np.floating]"]: r"""Computes the classical-equivalent estimator for the mean, :math:`\hat{\mu}_{ce}`. For the NIW model, this estimator is the posterior mean :math:`\mu_1`, as defined in Eq. (15). .. math:: \hat{\mu}_{ce} = \mu_1 Returns: The posterior mean vector :math:`\mu_1` as a NumPy array or pandas Series. Raises: RuntimeError: If posterior parameters have not been computed via :meth:`update`. """ if self._posterior is None: raise RuntimeError("Posterior parameters not computed. Call `update()` first.") mu1 = self._posterior.mu1 # If stored as numpy but we want to return pandas, wrap here: if self._pandas and not isinstance(mu1, pd.Series) and self._asset_index is not None: return pd.Series(mu1, index=self._asset_index) return mu1
[docs] def get_S_mu(self) -> Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]: r"""Computes the scatter matrix :math:`S_{\mu}` for the posterior of :math:`\mu`. This matrix describes the dispersion of the marginal posterior distribution of :math:`\mu` and is used to define the location-dispersion ellipsoid for robust optimization. It is defined in Eq. (16): .. math:: S_{\mu} = \frac{1}{T_1} \frac{\nu_1}{\nu_1 - 2} \Sigma_1 This computation requires the posterior degrees of freedom :math:`\nu_1 > 2`. Returns: The scatter matrix :math:`S_{\mu}` as a NumPy array or pandas DataFrame. Raises: RuntimeError: If posterior parameters have not been computed. ValueError: If :math:`\nu_1 \le 2`, as the scatter matrix is not defined. """ if self._posterior is None: raise RuntimeError("Posterior parameters not computed. Call `update()` first.") if self._posterior.nu1 <= 2: raise ValueError("Posterior degrees of freedom ν₁ must be greater than 2 to compute S_μ.") factor = self._posterior.nu1 / (self._posterior.T1 * (self._posterior.nu1 - 2.0)) # Ensure underlying data is array for multiplication sigma1_array = self._posterior.sigma1 if isinstance(sigma1_array, pd.DataFrame): sigma1_array = sigma1_array.values S_mu_array = factor * sigma1_array if self._pandas and self._asset_index is not None: return pd.DataFrame(S_mu_array, index=self._asset_index, columns=self._asset_index) return S_mu_array
[docs] def get_sigma_ce(self) -> Union[npt.NDArray[np.floating], "pd.DataFrame[np.floating]"]: r"""Computes the classical-equivalent estimator for the covariance, :math:`\hat{\Sigma}_{ce}`. This estimator is a shrunk version of the posterior scale matrix :math:`\Sigma_1`, as defined in Eq. (17). It serves as the center of the uncertainty ellipsoid for :math:`\Sigma`. .. math:: \hat{\Sigma}_{ce} = \frac{\nu_1}{\nu_1 + N + 1} \Sigma_1 Returns: The classical-equivalent estimator :math:`\hat{\Sigma}_{ce}` as a NumPy array or pandas DataFrame. Raises: RuntimeError: If posterior parameters have not been computed. ValueError: If :math:`\nu_1 + N + 1 = 0`, which is highly unlikely with valid inputs. """ if self._posterior is None: raise RuntimeError("Posterior parameters not computed. Call `update()` first.") denom = self._posterior.nu1 + self.N + 1.0 if denom == 0: raise ValueError("Denominator (ν₁ + N + 1) for Σ_ce is zero.") factor = self._posterior.nu1 / denom sigma1_array = self._posterior.sigma1 if isinstance(sigma1_array, pd.DataFrame): sigma1_array = sigma1_array.values sigma_ce_array = factor * sigma1_array if self._pandas and self._asset_index is not None: return pd.DataFrame(sigma_ce_array, index=self._asset_index, columns=self._asset_index) return sigma_ce_array
[docs] def cred_radius_mu(self, p_mu: float) -> float: r"""Computes the credibility factor :math:`\gamma_\mu` for the mean's uncertainty. This factor, :math:`\gamma_\mu`, appears in the simplified robust mean-variance optimization problem (Eq. 19). It scales the portfolio's posterior standard deviation to penalize for estimation risk in the mean vector. Its formula is given by Eq. (20): .. math:: \gamma_\mu = \sqrt{ \frac{q_\mu^2}{T_1} \frac{\nu_1}{\nu_1 - 2} } where :math:`q_\mu^2 = Q_{\chi^2_N}(p_{mu})` is the squared radius factor from the chi-square distribution. Args: p_mu: The confidence level for :math:`\mu` (0 < p_mu < 1), which reflects aversion to estimation risk. Returns: The credibility factor :math:`\gamma_\mu`. Raises: RuntimeError: If posterior parameters have not been computed. ValueError: If :math:`\nu_1 \le 2` or `p_mu` is not in (0,1). """ if self._posterior is None: raise RuntimeError("Posterior parameters not computed. Call `update()` first.") if self._posterior.nu1 <= 2: raise ValueError("Posterior ν₁ must be greater than 2 for γ_μ calculation.") if not (0.0 < p_mu < 1.0): raise ValueError("Confidence level p_mu must be between 0 and 1 (exclusive).") q_mu_squared = chi2_quantile(p_mu, self.N, sqrt=False) term_T1 = self._posterior.T1 term_nu1 = self._posterior.nu1 gamma_mu = np.sqrt((q_mu_squared / term_T1) * (term_nu1 / (term_nu1 - 2.0))) return gamma_mu
[docs] def cred_radius_sigma_factor(self, p_sigma: float) -> float: r"""Computes the scaling factor for the worst-case portfolio variance. In the robust framework, the maximum possible variance of a portfolio within the uncertainty ellipsoid for :math:`\Sigma` is not simply :math:`w'\Sigma_1 w`, but a scaled version of it. This method computes that scaling factor, which we can call :math:`C_\Sigma`. The derivation is shown in Appendix 7.2, and the final result is presented in the maximization step in Eq. (47): .. math:: \max_{\Sigma \in \Theta_\Sigma} w'\Sigma w = \underbrace{ \left[ \frac{\nu_1}{\nu_1 + N + 1} + \sqrt{\frac{2\nu_1^2 q_\Sigma^2}{(\nu_1 + N + 1)^3}} \right] }_{C_\Sigma} (w'\Sigma_1 w) where :math:`q_\Sigma^2 = Q_{\chi^2_{dof}}(p_\Sigma)` with :math:`dof = N(N+1)/2`. Args: p_sigma: The confidence level for :math:`\Sigma` (0 < p_sigma < 1), reflecting aversion to estimation risk. Returns: The credibility factor :math:`C_\Sigma` for scaling the portfolio variance. Raises: RuntimeError: If posterior parameters have not been computed. ValueError: If `p_sigma` is not in (0,1) or internal terms are invalid. """ if self._posterior is None: raise RuntimeError("Posterior parameters not computed. Call `update()` first.") if not (0.0 < p_sigma < 1.0): raise ValueError("Confidence level p_sigma must be between 0 and 1 (exclusive).") nu1 = self._posterior.nu1 denom_cubed_base = nu1 + self.N + 1.0 if denom_cubed_base <= 0: raise ValueError("Term (ν₁ + N + 1) must be positive for C_Σ calculation.") dof = self.N * (self.N + 1) // 2 q_sigma_squared_val = chi2_quantile(p_sigma, dof, sqrt=False) term1 = nu1 / denom_cubed_base numerator_term2 = 2.0 * nu1**2 * q_sigma_squared_val denominator_term2 = denom_cubed_base**3 if denominator_term2 == 0: raise ValueError("Denominator for sqrt term in C_Σ is zero.") term2_arg = numerator_term2 / denominator_term2 if term2_arg < 0: # This should not happen with valid inputs but good to check warnings.warn( f"Argument for sqrt in C_Σ calculation is negative ({term2_arg:f}); result may be NaN.", RuntimeWarning ) term2 = np.sqrt(term2_arg) C_sigma = term1 + term2 return C_sigma