Source code for slsqp_jax.diagnostics.signals

"""Signal evaluators and the registry that wires them in.

A signal is the diagnostics layer's atomic unit of evidence: an
evaluator function decides at fire time whether a known suspicious
pattern is present, and (when it fires) immediately builds the
linalg artifacts it needs from the live state so the report renderer
can surface them without re-running the solver.

The plan distinguishes two flavours of evaluator:

* **Per-step evaluators** receive ``(state, summary, summaries)`` and
  are called inside the runner's iteration loop while the live
  ``SLSQPState`` is in scope.  Each evaluator splits into a cheap
  predicate (``cheap_predicate(summary) -> bool``) that runs every
  step from host scalars only, and an expensive
  ``build_artifacts(state) -> dict`` that pulls device arrays only
  when the predicate fires.
* **End-of-run evaluators** receive
  ``(final_state, summaries, coarse_result)`` and run once after the
  loop exits.  They never see a non-final ``SLSQPState``; trajectory
  inspection happens against the ``StepSummary`` list instead.

Both flavours are registered into :data:`PER_STEP_EVALUATORS` /
:data:`END_OF_RUN_EVALUATORS` (Phase 2 fills them in; Phase 1 ships
empty so the runner has the same call surface from day one).
"""

from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal, Optional

import jax.numpy as jnp
import numpy as np

from slsqp_jax.diagnostics.playbook import confidence_for, magnitude_for

if TYPE_CHECKING:
    from slsqp_jax.diagnostics.records import StepSummary
    from slsqp_jax.slsqp import SLSQP
    from slsqp_jax.state import SLSQPState


Specificity = Literal["specific", "ambiguous", "generic"]
Magnitude = Literal["marginal", "moderate", "extreme"]
Confidence = Literal["low", "medium", "high"]


@dataclass(frozen=True)
class EvalContext:
    """Bundle of solver-derived constants that evaluators need.

    Threaded through the runner so per-step + end-of-run evaluators
    can read the user's :attr:`rtol` / :attr:`atol` (e.g. for the
    ``multiplier_recovery_noise`` test which compares against
    ``rtol * max(mu_max, 1)`` — see
    :func:`slsqp_jax.slsqp.termination.compute_mu_max`) without
    taking a stateful dependency on the solver instance.

    Attributes:
        solver: The :class:`slsqp_jax.SLSQP` instance the run used.
            Carried because some artifacts (e.g. the initial L-BFGS
            memory size) live on the solver, not the state.
        rtol: Relative-stationarity tolerance used by ``terminate()``.
        atol: Primal-feasibility tolerance used by ``terminate()``.
        max_steps: Maximum number of SQP iterations the manual loop
            was allowed to run.
    """

    solver: "SLSQP"
    rtol: float
    atol: float
    max_steps: int


[docs] @dataclass(frozen=True) class Signal: """Single evidence-backed hypothesis emitted by an evaluator. The wording contract is *hypothesis with evidence*, not verdict. ``summary`` reads "X looks suspicious because Y"; ``detail`` expands on the mechanism; ``suggestions`` are starting points for the user to investigate, not prescriptions to apply blindly. Attributes: name: Stable machine-readable identifier (used for the playbook scope filter and the cap-policy enforcement test). specificity: How uniquely this pattern points at one cause. ``specific`` = essentially diagnostic for one cause; ``ambiguous`` = consistent with two or three named causes; ``generic`` = consistent with many causes. Static editorial tag set at registration. magnitude: How far past threshold the evidence landed. ``marginal`` = within 10x of threshold; ``moderate`` = 10x-100x past threshold; ``extreme`` = >100x past threshold. Computed from ``evidence`` at fire time. confidence: Derived from ``(specificity, magnitude)`` via the lookup table in :func:`slsqp_jax.diagnostics.playbook.confidence_for`. The single ranking axis the report sorts by. summary: One-line "X looks suspicious because Y" framing. detail: Multi-paragraph mechanism explanation. evidence: Numeric evidence keyed by short name. Each value is the actual measurement; the threshold is documented in the evaluator's docstring. suggestions: Concrete starting-point fixes (config knobs, alternative inner solvers, etc). artifacts: Heavy linalg objects the evaluator computed immediately from the live state when it fired. May be empty. offending_step: 1-indexed step the signal fired on, or ``None`` for end-of-run evaluators that scope over the whole run. """ name: str specificity: Specificity magnitude: Magnitude confidence: Confidence summary: str detail: str evidence: dict[str, float | int] = field(default_factory=dict) suggestions: list[str] = field(default_factory=list) artifacts: dict[str, np.ndarray] = field(default_factory=dict) offending_step: Optional[int] = None
# The two flavours of evaluator callable signature. Kept as bare # Callable aliases (rather than typing.Protocol) so subclassing / # duck-typing in tests is trivial. Both flavours take the # :class:`EvalContext` as the first positional arg so they can read # tolerances / solver config without a separate import. PerStepEvaluator = Callable[ [EvalContext, "SLSQPState", "StepSummary", list["StepSummary"]], Optional[Signal], ] EndOfRunEvaluator = Callable[ [EvalContext, "SLSQPState", list["StepSummary"], Any], Optional[Signal], ] @dataclass(frozen=True) class SignalRegistration: """Static registration record for a signal evaluator. The ``cap-policy`` enforcement test (``tests/diagnostics/test_registry.py``) iterates the registries of :data:`PER_STEP_EVALUATORS` / :data:`END_OF_RUN_EVALUATORS` and asserts that for every ``name`` here there exists a ``test_signal_<name>_synthetic`` and ``test_signal_<name>_integration`` in the diagnostics test module. ``specificity`` is captured here so it stays adjacent to the evaluator and the docstring (single source of truth). """ name: str specificity: Specificity evaluator: Callable flavour: Literal["per_step", "end_of_run"] # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- # Phase 1 ships empty registries; Phase 2 populates them via # :func:`register_evaluator`. These are the canonical lists the # runner walks each iteration / at end-of-run. PER_STEP_EVALUATORS: tuple[PerStepEvaluator, ...] = () END_OF_RUN_EVALUATORS: tuple[EndOfRunEvaluator, ...] = () # Parallel registry of the static metadata for each evaluator. Kept # as a list (not a tuple) so :func:`register_evaluator` can append in # place without reconstructing the public per-step / end-of-run # tuples on every call. SIGNAL_REGISTRY: list[SignalRegistration] = [] def register_evaluator( name: str, *, specificity: Specificity, flavour: Literal["per_step", "end_of_run"], evaluator: Callable, ) -> None: """Append an evaluator to the appropriate registry. Phase 2 calls this at module import time for each starter signal. The function is idempotent on ``name`` (re-registering the same name overwrites the previous record), which simplifies tests that exercise alternative thresholds. """ global PER_STEP_EVALUATORS, END_OF_RUN_EVALUATORS # Replace any prior registration with the same name. for i, reg in enumerate(SIGNAL_REGISTRY): if reg.name == name: # pragma: no cover SIGNAL_REGISTRY.pop(i) break SIGNAL_REGISTRY.append( SignalRegistration( name=name, specificity=specificity, evaluator=evaluator, flavour=flavour, ) ) PER_STEP_EVALUATORS = tuple( reg.evaluator for reg in SIGNAL_REGISTRY if reg.flavour == "per_step" ) END_OF_RUN_EVALUATORS = tuple( reg.evaluator for reg in SIGNAL_REGISTRY if reg.flavour == "end_of_run" ) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _to_numpy(arr: Any) -> np.ndarray: """Materialise a JAX device array to a NumPy host array. Used by :func:`build_artifacts` paths to pull the (small) linalg objects a fired signal wants the user to be able to dig into further. Idempotent for inputs that are already NumPy arrays. """ return np.asarray(arr) def _build_signal( *, name: str, specificity: Specificity, summary: str, detail: str, evidence: dict[str, float | int], threshold_ratio: float, suggestions: list[str], artifacts: Optional[dict[str, np.ndarray]] = None, offending_step: Optional[int] = None, ) -> Signal: """Construct a :class:`Signal` with derived ``magnitude`` / ``confidence`` from a ratio-to-threshold input. ``threshold_ratio`` is the largest ratio of any single evidence value to its documented threshold (always ``>= 1`` whenever the signal fires). Centralising the magnitude / confidence derivation here means each evaluator just computes the worst ratio and hands it off, instead of duplicating the lookup table. """ magnitude = magnitude_for(threshold_ratio) confidence = confidence_for(specificity, magnitude) return Signal( name=name, specificity=specificity, magnitude=magnitude, confidence=confidence, summary=summary, detail=detail, evidence=dict(evidence), suggestions=list(suggestions), artifacts=dict(artifacts or {}), offending_step=offending_step, ) def _just_crossed_below( summaries: list["StepSummary"], attr: str, threshold: float ) -> bool: """``True`` iff the most-recent summary has ``attr < threshold`` AND the previous summary did not (or there is no previous summary). Used to fire a signal exactly once on the *first* crossing so the report does not list the same signal at every step it remains tripped. """ if not summaries: return False # pragma: no cover cur = float(getattr(summaries[-1], attr)) if not (cur < threshold): return False if len(summaries) == 1: return True # pragma: no cover prev = float(getattr(summaries[-2], attr)) return not (prev < threshold) def _streak_just_reached( summaries: list["StepSummary"], predicate: Callable[["StepSummary"], bool], n: int, ) -> bool: """``True`` iff the last ``n`` summaries all satisfy ``predicate`` AND the immediately-prior summary did not (or did not exist). Centralises the consecutive-N pattern used by ``lbfgs_conditioning_extreme`` and any future streak signal so a signal fires once at the moment the streak first becomes long enough, not at every subsequent step. """ if len(summaries) < n: return False tail = summaries[-n:] if not all(predicate(s) for s in tail): return False if len(summaries) == n: return True # pragma: no cover return not predicate(summaries[-n - 1]) # --------------------------------------------------------------------------- # Per-step evaluators # --------------------------------------------------------------------------- # Threshold below which the cumulative low-water singular-value # estimate of ``J_eq`` is treated as a LICQ violation. Sourced from # the AGENTS.md "QP failure diagnostics" + ``min_projected_grad_norm`` # discussions: the projected-CG / CRAIG inner solvers start to suffer # from regularisation bias once ``sigma_min(J_eq) < 1e-8``. _RANK_DEF_THRESHOLD = 1e-8 def _eval_eq_jacobian_rank_deficient( ctx: EvalContext, state: "SLSQPState", summary: "StepSummary", summaries: list["StepSummary"], ) -> Optional[Signal]: """Per-step: equality Jacobian appears rank-deficient. Cheap predicate (scalar): the cumulative low-water mark ``eq_jac_min_sv_est`` carried on :attr:`SLSQPDiagnostics.eq_jac_min_sv_est` just dropped below :data:`_RANK_DEF_THRESHOLD` (= ``1e-8``) at this step. Because the diagnostics counter is a low-water mark rather than a per-step value, the "first crossing" predicate is the right way to fire only once per run. Build_artifacts (only invoked if the predicate fires): pulls ``state.eq_jac`` (``m_eq * n * 8`` bytes) to host, computes ``J J^T`` (``m_eq^2 * 8`` bytes — tiny), and the singular values of ``J_eq`` for the user to inspect. Specificity: ``specific``. The (sigma_min < 1e-8) signature essentially uniquely points at LICQ violation / projector instability. Hypothesis: the equality Jacobian is (near-)rank-deficient at the current iterate, which makes the null-space projector ``v - A^T (A A^T)^{-1} A v`` numerically unstable. See ``AGENTS.md`` "Cholesky-based projection" and the ``CraigInnerSolver`` notes for context. """ # Nothing to fire when there are no equality constraints. if int(state.eq_val.shape[0]) == 0: return None if not _just_crossed_below(summaries, "eq_jac_min_sv_est", _RANK_DEF_THRESHOLD): return None # ---- build_artifacts (live state in scope) ---- J_eq = _to_numpy(state.eq_jac) JJT = _to_numpy(state.eq_jac @ state.eq_jac.T) try: sv = _to_numpy(jnp.linalg.svd(state.eq_jac, compute_uv=False)) except Exception: # pragma: no cover -- defensive sv = np.linalg.svd(J_eq, compute_uv=False) sigma_min = float(np.min(sv)) sigma_max = float(np.max(sv)) threshold_ratio = _RANK_DEF_THRESHOLD / max(sigma_min, 1e-300) detail = ( "The equality Jacobian J_eq has min singular value " f"{sigma_min:.3e} (cumulative low-water " f"{summary.eq_jac_min_sv_est:.3e}). Below ~1e-8 the " "null-space projector v - J_eq^T (J_eq J_eq^T)^{-1} J_eq v " "is numerically unstable: the Cholesky regularisation " "epsilon * I starts to bias the projection, and CRAIG / " "MINRES-QLP can stall. This pattern is essentially " "diagnostic for a LICQ violation, but it can also surface " "when the equality constraints are well-posed analytically " "yet badly scaled." ) return _build_signal( name="eq_jacobian_rank_deficient", specificity="specific", summary=( f"J_eq looks suspicious because its smallest singular " f"value ({sigma_min:.3e}) is below 1e-8 — the null-space " "projector is numerically unstable here." ), detail=detail, evidence={ "sigma_min": sigma_min, "sigma_max": sigma_max, "threshold": _RANK_DEF_THRESHOLD, "step": summary.step_count, }, threshold_ratio=threshold_ratio, suggestions=[ "Check that the equality constraints are linearly " "independent at the iterate (LICQ).", "If the constraints are well-posed but badly scaled, " "rescale them so each row of J_eq has comparable norm.", "Try MinresQLPSolver (handles indefinite/singular saddle-" "point systems natively) or CraigInnerSolver (no " "epsilon * I regularisation in the projector).", ], artifacts={ "J_eq": J_eq, "JJT": JJT, "singular_values": sv, }, offending_step=int(summary.step_count), ) _LBFGS_KAPPA_THRESHOLD = 1e6 _LBFGS_KAPPA_STREAK = 3 # Single-step "burst" trigger: catches catastrophic one-shot blow-ups # that the streak gate misses (the L-BFGS reset chain typically clamps # back to identity within one or two steps after the spike, so the # streak never reaches its 3-step minimum). Two orders of magnitude # above ``_LBFGS_KAPPA_THRESHOLD`` so a routine "above-but-recovering" # kappa does not also fire the burst clause. _LBFGS_KAPPA_BURST = 1e8 def _eval_lbfgs_conditioning_extreme( ctx: EvalContext, state: "SLSQPState", summary: "StepSummary", summaries: list["StepSummary"], ) -> Optional[Signal]: """Per-step: L-BFGS B0 diagonal condition number is extreme. Cheap predicate (scalar): either of two clauses fires. 1. **Streak**: the L-BFGS diagonal condition-number proxy ``max_diag / min_diag`` (carried on :attr:`StepSummary.diag_kappa`) has been above :data:`_LBFGS_KAPPA_THRESHOLD` (= ``1e6``) for the last :data:`_LBFGS_KAPPA_STREAK` (= ``3``) consecutive steps. Three is the ``soft reset`` patience the L-BFGS reset chain uses; once the streak reaches that length the chain triggers a soft reset, so it is also the right point to flag the user. 2. **Burst**: the *current* step's ``diag_kappa`` is above :data:`_LBFGS_KAPPA_BURST` (= ``1e8``), which catches catastrophic one-shot blow-ups (e.g. ``kappa = 1e+09`` for a single step before the reset chain clamps back to identity). The streak gate cannot catch these because the post-spike identity reset breaks the streak after one step. Build_artifacts (only invoked if the predicate fires): pulls the full L-BFGS diagonal (``n * 8`` bytes — fits in L1 even for very large ``n``) plus the scalar gamma / history count. Specificity: ``ambiguous``. Extreme L-BFGS conditioning is consistent with several causes (stale curvature pairs, post- identity-reset skip lock, bad initial scaling), so the report cannot pin it on one without more evidence. """ burst_fired = float(summary.diag_kappa) > _LBFGS_KAPPA_BURST streak_fired = _streak_just_reached( summaries, lambda s: s.diag_kappa > _LBFGS_KAPPA_THRESHOLD, _LBFGS_KAPPA_STREAK, ) if not (streak_fired or burst_fired): return None mode = "burst" if burst_fired else "streak" diagonal = _to_numpy(state.lbfgs_history.diagonal) gamma = float(state.lbfgs_history.gamma) history_count = int(state.lbfgs_history.count) if burst_fired: threshold_ratio = float(summary.diag_kappa) / _LBFGS_KAPPA_BURST clause_summary = ( f"a single-step burst above {_LBFGS_KAPPA_BURST:.0e} " "(catastrophic one-shot blow-up)" ) else: threshold_ratio = float(summary.diag_kappa) / _LBFGS_KAPPA_THRESHOLD clause_summary = ( f"a streak of {_LBFGS_KAPPA_STREAK} consecutive iterations " f"above {_LBFGS_KAPPA_THRESHOLD:.0e}" ) detail = ( f"L-BFGS B0 diagonal condition number kappa(B0) = " f"{summary.diag_kappa:.3e} (min={summary.min_diag:.3e}, " f"max={summary.max_diag:.3e}) tripped the " f"{mode!r} clause: {clause_summary}. " "Extreme conditioning is consistent with several causes: " "(i) stale curvature pairs whose s/y vectors are no longer " "informative at the current iterate, (ii) the post-identity-" "reset skip lock the AGENTS.md describes (lowering " "``lbfgs_skip_floor`` from 1e-8 to 1e-12 was the fix), " "(iii) badly-scaled variables, and (iv) a fallback projected-" "gradient direction (after a QP-budget exhaustion) being " "appended to the L-BFGS history as if it were a Newton " "secant. Each cause has a different fix; check " "``state.lbfgs_history.count`` and the ``n_lbfgs_skips`` " "counter to disambiguate." ) return _build_signal( name="lbfgs_conditioning_extreme", specificity="ambiguous", summary=( f"The L-BFGS B0 diagonal looks suspicious because its " f"condition number is {summary.diag_kappa:.3e} ({mode} " f"clause: {clause_summary})." ), detail=detail, evidence={ "diag_kappa": summary.diag_kappa, "min_diag": summary.min_diag, "max_diag": summary.max_diag, "threshold": _LBFGS_KAPPA_THRESHOLD, "burst_threshold": _LBFGS_KAPPA_BURST, "streak": _LBFGS_KAPPA_STREAK, # ``burst_clause = 1`` ⇔ the single-step burst fired, # ``burst_clause = 0`` ⇔ the streak fired. Encoded as int # because :attr:`Signal.evidence` is typed numeric-only. "burst_clause": 1 if burst_fired else 0, "step": summary.step_count, }, threshold_ratio=threshold_ratio, suggestions=[ "Try ``use_exact_hvp_in_qp=True`` so the QP no longer " "depends on the L-BFGS approximation.", "If many curvature pairs are being skipped " "(``n_lbfgs_skips`` is large), inspect the secant pairs " "for noise.", "Consider rescaling the variables so coordinate-wise " "curvatures are within a few orders of magnitude.", ], artifacts={ "diagonal": diagonal, "gamma": np.asarray(gamma), "history_count": np.asarray(history_count), }, offending_step=int(summary.step_count), ) # --------------------------------------------------------------------------- # End-of-run evaluators # --------------------------------------------------------------------------- def _eval_multiplier_recovery_noise( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: noise-floor stationarity stall pattern. The textbook signature documented in ``AGENTS.md``: * ``min_projected_grad_norm / max(mu_max, 1) < rtol`` (the inner solver's noise-aware stationarity proxy already passed) * ``|grad_L| / max(mu_max, 1) > rtol`` (the classical stationarity proxy did not) * ``n_steps_inexact_below_classical / step_count > 0.5`` (more than half the iterations had a strictly-cleaner projected gradient than classical, i.e. multiplier-recovery noise was the limiting factor across the bulk of the run) All three together are essentially diagnostic for the noise-floor stationarity stall mode that ``HRInexactSTCG`` + ``use_inexact_stationarity=True`` is designed to rescue. Specificity: ``specific``. """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics last = summaries[-1] kkt_scale = max(last.kkt_scale, 1.0) rtol = ctx.rtol classical_ratio = last.grad_lagrangian_norm / kkt_scale proj_ratio = float(diag.min_projected_grad_norm) / kkt_scale n_inexact_below = int(diag.n_steps_inexact_below_classical) fraction_below = n_inexact_below / max(last.step_count, 1) if not (proj_ratio < rtol): return None if not (classical_ratio > rtol): return None # pragma: no cover if not (fraction_below > 0.5): return None # pragma: no cover # Magnitude reflects how clean the projected ratio is relative to # rtol versus how stuck the classical ratio is. We use the # ratio of classical-to-projected; the further apart they are, # the more obvious the noise-floor diagnosis. threshold_ratio = classical_ratio / max(proj_ratio, 1e-300) detail = ( "The inner solver's projected-gradient norm low-water mark " f"satisfies ||W g|| / max(mu_max, 1) = {proj_ratio:.3e} < rtol " f"({rtol:.1e}), while the classical Lagrangian gradient " f"ratio ||grad_L|| / max(mu_max, 1) = {classical_ratio:.3e} is still " "above rtol. This is the textbook signature of multiplier-" "recovery noise contaminating the classical stationarity " 'test (see AGENTS.md "Inexact stationarity disjunct"). ' f"More than half the iterations ({n_inexact_below} / " f"{last.step_count}) had ||W g|| < ||grad_L||, confirming " "the pattern is persistent rather than a single-step " "artefact. HRInexactSTCG paired with " "``use_inexact_stationarity=True`` directly rescues this " "mode." ) return _build_signal( name="multiplier_recovery_noise", specificity="specific", summary=( f"The classical stationarity test ({classical_ratio:.3e}) " f"looks suspicious because the inner-solver projected-" f"gradient ratio ({proj_ratio:.3e}) already passed rtol " f"({rtol:.1e}) but the recovered Lagrangian gradient is " "noise-dominated." ), detail=detail, evidence={ "classical_ratio": classical_ratio, "projected_ratio": proj_ratio, "rtol": rtol, "n_steps_inexact_below_classical": n_inexact_below, "fraction_below": fraction_below, }, threshold_ratio=threshold_ratio, suggestions=[ "Switch the inner solver to HRInexactSTCG (e.g. " "``inner_solver=HRInexactSTCG(inner=ProjectedCGCholesky())``).", "Set ``use_inexact_stationarity=True`` so the convergence " "test admits the projected-gradient disjunct.", ], artifacts={}, offending_step=None, ) # Hard floor used as a fallback when the solver's LS budget cannot be # read off ``EvalContext`` (e.g. synthetic tests that use a stand-in # solver). ``2 ** -20 ~= 9.5e-7`` is the SciPy/SLSQP default; multiply # by 10 so a single backtracking step above the floor still triggers. _LS_COLLAPSE_FALLBACK_FLOOR = 10.0 * (2.0**-20) def _ls_floor_for(ctx: EvalContext) -> float: """Return ``10 * 2**-line_search.max_steps`` for the active solver. The backtracking line search halves ``alpha`` at most ``line_search.max_steps`` times before giving up, so the smallest accepted ``alpha`` on success is ``2**-max_steps``. We multiply by 10 so an LS that bottomed out within one backtracking step of the floor still trips the predicate (matches the ``magnitude_for`` bucketing of "marginal" = within 10x). Falls back to :data:`_LS_COLLAPSE_FALLBACK_FLOOR` (the SciPy default ``max_steps == 20``) when the solver instance does not expose the ``line_search_max_steps`` accessor — this lets the synthetic test suite use a minimal stand-in solver without pulling in the full :class:`SLSQP` class. """ solver = ctx.solver max_steps = getattr(solver, "line_search_max_steps", None) if not isinstance(max_steps, int) or max_steps <= 0: return _LS_COLLAPSE_FALLBACK_FLOOR return 10.0 * (2.0**-max_steps) def _eval_line_search_collapse( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: line search collapsed to a tiny step at termination. Cheap predicate: the cumulative ``tail_ls_failures`` counter is > 0 (the line search failed at the last iteration) and the minimum line-search alpha across the run was at or below the LS floor ``10 * 2**-line_search.max_steps`` (i.e. within one backtracking step of the smallest ``alpha`` the line search will ever accept). See :func:`_ls_floor_for` for the derivation — the previous hard-coded ``1e-10`` literal could never fire on the default ``LineSearchConfig.max_steps == 20`` solver because the LS floor itself is ``2**-20 ~= 9.5e-7``. Specificity: ``ambiguous`` — a stuck LS is consistent with a bad L-BFGS direction, a too-small merit penalty, or a non-descent QP step. Artifacts: the ``alpha`` trajectory near the offending step plus the final L-BFGS diagonal. The exact L-BFGS diagonal at the offending step is not preserved (that would require checkpointing every step); use :func:`capture_state_at_step` if you need it. """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics tail_failures = int(diag.tail_ls_failures) alpha_min = float(diag.ls_alpha_min) ls_floor = _ls_floor_for(ctx) if not (tail_failures > 0 and alpha_min <= ls_floor): return None # Locate the offending step (first time alpha drops to the LS # floor while the LS was reporting failure). offending_step = None for s in summaries: if (not s.ls_success) and s.last_alpha <= ls_floor: offending_step = int(s.step_count) break if offending_step is None: offending_step = int(summaries[-1].step_count) # pragma: no cover # Trajectory window around the offending step (5 before + 5 after, # clipped) for the artifact. idx = max(0, offending_step - 1) window_lo = max(0, idx - 5) window_hi = min(len(summaries), idx + 6) alpha_window = np.asarray( [s.last_alpha for s in summaries[window_lo:window_hi]], dtype=float, ) final_diagonal = _to_numpy(final_state.lbfgs_history.diagonal) threshold_ratio = ls_floor / max(alpha_min, 1e-300) detail = ( f"The line search reached its smallest accepted alpha " f"({alpha_min:.3e}) at or below the LS floor " f"({ls_floor:.3e} = 10 * 2**-max_ls_steps) and exited with " f"{tail_failures} tail line-search failures. This is " "consistent with (i) a non-descent direction the QP returned " "(L-BFGS conditioning, stale curvature), (ii) a too-small " "merit penalty rho, or (iii) a step that violates the " "Armijo condition by overshooting active constraints. The " "alpha-trajectory window around the offending step is " "attached as an artifact. The L-BFGS diagonal at the " "*offending* step is not preserved by the diagnostics layer; " "call ``capture_state_at_step(..., step=" f"{offending_step})`` to recover it exactly." ) return _build_signal( name="line_search_collapse", specificity="ambiguous", summary=( f"The line search looks suspicious because alpha " f"collapsed to {alpha_min:.3e} (<= LS floor " f"{ls_floor:.3e}) and the run exited with " f"{tail_failures} tail line-search failures." ), detail=detail, evidence={ "ls_alpha_min": alpha_min, "tail_ls_failures": tail_failures, "ls_floor": ls_floor, }, threshold_ratio=threshold_ratio, suggestions=[ "Inspect the L-BFGS diagonal / gamma at the offending " "step (use ``capture_state_at_step``). If kappa(B0) is " "extreme, try ``use_exact_hvp_in_qp=True``.", "Try a larger initial merit penalty (raise the QP " "multipliers' bound or set a larger ``proximal_mu_max``).", "Try MinresQLPSolver: it solves the saddle-point KKT " "directly and is less sensitive to L-BFGS corruption.", ], artifacts={ "alpha_window": alpha_window, "lbfgs_diagonal_final": final_diagonal, }, offending_step=offending_step, ) def _eval_qp_budget_or_pingpong( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: QP active-set loop ran out of budget or ping-ponged. Specificity: ``generic``. Both events have many possible causes (LPEC-A over-prediction, degenerate vertex, coupled multipliers from the inner solver, etc). """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics n_budget = int(diag.n_qp_budget_exhausted) n_pingpong = int(diag.n_qp_ping_pong) if not (n_budget > 0 or n_pingpong > 0): return None n_steps = max(len(summaries), 1) fraction_budget = n_budget / n_steps fraction_pingpong = n_pingpong / n_steps # The "threshold" for either is "1 occurrence per run" ie 1/n_steps; # ratio is just the count itself relative to a threshold of 1. threshold_ratio = max(n_budget, n_pingpong) / 1.0 detail = ( f"The QP active-set loop hit its iteration budget on " f"{n_budget} steps and short-circuited on a ping-pong on " f"{n_pingpong} steps (out of {n_steps} total iterations). " "Both events are usually downstream of one of: an LPEC-A " "over-prediction the trust gate failed to catch (raise " "``lpeca_trust_threshold`` or use " "``active_set_method='expand'``), a degenerate vertex with " "coupled constraints (raise ``mult_drop_floor``), or " "noise-contaminated multipliers from the inner solver " "(switch to MinresQLPSolver or HRInexactSTCG)." ) return _build_signal( name="qp_budget_or_pingpong", specificity="generic", summary=( f"The QP layer looks suspicious because it exhausted its " f"budget on {n_budget} steps and ping-ponged on " f"{n_pingpong} steps." ), detail=detail, evidence={ "n_qp_budget_exhausted": n_budget, "n_qp_ping_pong": n_pingpong, "fraction_budget": fraction_budget, "fraction_pingpong": fraction_pingpong, "n_steps": n_steps, }, threshold_ratio=threshold_ratio, suggestions=[ "Raise ``qp_max_iter`` to give the active-set loop more " "headroom (current default may be too tight for " "ill-conditioned problems).", "If LPEC-A is enabled, raise ``lpeca_trust_threshold`` " "or fall back to ``active_set_method='expand'``.", "Raise ``mult_drop_floor`` so noise-flipped multipliers " "do not spuriously drop active constraints.", ], artifacts={}, offending_step=None, ) def _eval_merit_oscillation( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: merit function regressed too often. Cheap predicate: ``n_merit_regressions > step_count / 10`` (more than 10 % of steps saw merit increase despite line-search success). Specificity: ``generic`` — merit oscillation can be a too-small ``rho``, a stale Hessian approximation, or both. """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics n_regressions = int(diag.n_merit_regressions) n_steps = max(len(summaries), 1) threshold = max(n_steps // 10, 1) if not (n_regressions > threshold): return None threshold_ratio = n_regressions / threshold detail = ( f"The L1 merit function increased on {n_regressions} of " f"{n_steps} iterations despite the line search reporting " "success. The Han-Powell merit function is monotone-" "decreasing under a correctly-sized ``rho``; persistent " "regressions point at ``rho`` being too small (the penalty " "does not dominate the constraint violation) or at a stale " "L-BFGS approximation that produces non-descent QP steps." ) return _build_signal( name="merit_oscillation", specificity="generic", summary=( f"The merit function looks suspicious because it " f"regressed on {n_regressions} of {n_steps} iterations " f"(threshold = step_count / 10 = {threshold})." ), detail=detail, evidence={ "n_merit_regressions": n_regressions, "n_steps": n_steps, "threshold": threshold, }, threshold_ratio=threshold_ratio, suggestions=[ "Inspect the merit penalty ``rho`` trajectory; if it " "stays at the initial value, the penalty update is " "starved of evidence.", "Try ``use_exact_hvp_in_qp=True`` so the QP step " "reflects the actual curvature of the Lagrangian.", ], artifacts={}, offending_step=None, ) def _eval_lpeca_overpredicting( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: LPEC-A over-predicted on > 50 % of steps OR was bypassed every step. Specificity: ``ambiguous`` — over-prediction can mean LPEC-A's rho_bar is too pessimistic (rare) or that the iterate is far from KKT (the warm-up gate should already cover this), so the signal is informative but not pinpoint. """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics n_capped = int(diag.n_lpeca_capped) n_bypassed = int(diag.n_lpeca_bypassed) n_steps = max(len(summaries), 1) fraction_capped = n_capped / n_steps bypassed_all = n_bypassed == n_steps and n_steps > 0 if not (fraction_capped > 0.5 or bypassed_all): return None if bypassed_all: threshold_ratio = float(n_bypassed) / 1.0 # pragma: no cover else: threshold_ratio = fraction_capped / 0.5 detail = ( f"LPEC-A's rank-aware size cap truncated the prediction on " f"{n_capped} of {n_steps} iterations and the prediction was " f"bypassed on {n_bypassed} iterations. Persistent capping " "means rho_bar (the LPEC-A proximity measure) is letting " "too many constraints into the predicted active set, which " "in turn forces the cap to discard them. Persistent " "bypassing means the trust gate vetoed the prediction every " "step, in which case LPEC-A is contributing nothing. Both " "imply the EXPAND fallback is doing all the work." ) return _build_signal( name="lpeca_overpredicting", specificity="ambiguous", summary=( f"LPEC-A looks suspicious because it was capped on " f"{n_capped} steps and bypassed on {n_bypassed} steps " f"(out of {n_steps})." ), detail=detail, evidence={ "n_lpeca_capped": n_capped, "n_lpeca_bypassed": n_bypassed, "n_steps": n_steps, "fraction_capped": fraction_capped, }, threshold_ratio=threshold_ratio, suggestions=[ "Disable LPEC-A by setting " "``active_set_method='expand'`` and re-run; if " "convergence improves, LPEC-A was hurting more than " "helping on this problem.", "Raise ``lpeca_trust_threshold`` so the predictor is " "trusted more often (only safe when rho_bar is a " "reliable proxy near the optimum).", "Raise ``lpeca_warmup_steps`` so LPEC-A waits longer before contributing.", ], artifacts={}, offending_step=None, ) def _eval_infeasible_termination( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: the solver terminated at an infeasible iterate. Fires when either (a) the granular ``state.termination_code`` is one of {``infeasible``, ``nonfinite``}, or (b) primal feasibility was never satisfied across the run (max constraint violation above ``atol`` on every recorded summary). Specificity: ``specific``. Artifacts: indices and signed values of the worst-violated equality + inequality constraints at the final iterate. """ if not summaries: return None # pragma: no cover code = final_state.termination_code name = _result_name_safe(code) code_match = name in {"infeasible", "nonfinite"} last = summaries[-1] atol = ctx.atol feasibility_ever_satisfied = any( s.max_eq_violation <= atol and s.max_ineq_violation <= atol for s in summaries ) persistent_infeasibility = not feasibility_ever_satisfied and ( last.max_eq_violation > atol or last.max_ineq_violation > atol ) if not (code_match or persistent_infeasibility): return None eq_val = _to_numpy(final_state.eq_val) ineq_val = _to_numpy(final_state.ineq_val) # Worst-violated indices (top-5). eq_worst_idx = ( np.argsort(np.abs(eq_val))[::-1][:5] if eq_val.size > 0 else np.array([], dtype=int) ) ineq_violations = np.maximum(0.0, -ineq_val) if ineq_val.size > 0 else np.array([]) ineq_worst_idx = ( np.argsort(ineq_violations)[::-1][:5] if ineq_violations.size > 0 else np.array([], dtype=int) ) worst_violation = max(last.max_eq_violation, last.max_ineq_violation) threshold_ratio = worst_violation / max(atol, 1e-300) detail = ( f"The run terminated at a primally infeasible iterate " f"(termination code = {name!r}, max|c_eq| = " f"{last.max_eq_violation:.3e}, max(0, -c_ineq) = " f"{last.max_ineq_violation:.3e}, atol = {atol:.1e}). " "Either the constraints are mutually infeasible (no point " "satisfies all of them simultaneously) or the active-set " "machinery could not satisfy them within ``atol`` from the " "supplied initial point. The worst-violated constraint " "indices and signed values are attached as artifacts." ) return _build_signal( name="infeasible_termination", specificity="specific", summary=( f"Primal feasibility looks suspicious because the run " f"ended with max|c_eq|={last.max_eq_violation:.3e} and " f"max(0,-c_ineq)={last.max_ineq_violation:.3e} " f"(atol={atol:.1e})." ), detail=detail, evidence={ "max_eq_violation": last.max_eq_violation, "max_ineq_violation": last.max_ineq_violation, "atol": atol, }, threshold_ratio=threshold_ratio, suggestions=[ "Print the worst-violated constraints (see artifacts) " "and verify they admit a common feasible point.", "Provide a feasible initial iterate, or relax the " "constraints incrementally to find a feasibility " "boundary.", "If ``c_eq`` involves a divide-by-near-zero or other " "non-Lipschitz expression, consider rewriting the " "constraint to avoid the singularity.", ], artifacts={ "eq_values": eq_val, "ineq_values": ineq_val, "eq_worst_indices": np.asarray(eq_worst_idx, dtype=int), "ineq_worst_indices": np.asarray(ineq_worst_idx, dtype=int), }, offending_step=None, ) # Default ``divergence_patience`` from :class:`SLSQPConfig` (3). The # magnitude of :func:`_eval_divergence_rollback_triggered` is # normalised by this value so a run that triggered exactly one # rollback fires at "marginal" magnitude and a run that triggered the # rollback ten times fires at "moderate" / "extreme". _DIVERGENCE_PATIENCE_DEFAULT = 3 def _eval_divergence_rollback_triggered( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: best-iterate divergence rollback fired during the run. Cheap predicate: ``final_state.diagnostics.divergence_triggered`` is ``True`` (the rollback latched at least once) OR ``n_divergence_blowups > 0`` (a blow-up was observed even if patience was not yet reached). The rollback is the "best-iterate-rescue" guardrail described in ``AGENTS.md`` "Best-iterate divergence rollback"; if it fires, the run hit a catastrophic merit blow-up and the iterate the driver returned is ``state.best_x``, not the attempted final iterate. Specificity: ``specific``. This datum is essentially diagnostic for "the run blew up and was rescued by rollback" — no other failure mode flips ``divergence_triggered``. """ if not summaries: return None # pragma: no cover diag = final_state.diagnostics triggered = bool(diag.divergence_triggered) n_blowups = int(diag.n_divergence_blowups) if not (triggered or n_blowups > 0): return None last = summaries[-1] # Locate the offending step: first ``StepSummary`` recording a # blow-up event (``blowup_count`` > 0 immediately after a step # whose ``blowup_count`` was 0, or simply the first step with # ``diverging`` set). Falls back to the final summary when no # such step is found. offending_step: Optional[int] = None prev_blowup = 0 for s in summaries: if s.diverging: offending_step = int(s.step_count) break if s.blowup_count > prev_blowup: offending_step = int(s.step_count) break prev_blowup = int(s.blowup_count) if offending_step is None: offending_step = int(last.step_count) # pragma: no cover threshold_ratio = max(float(n_blowups), 1.0) / float(_DIVERGENCE_PATIENCE_DEFAULT) detail = ( f"The best-iterate divergence rollback latched (triggered = " f"{triggered}, n_divergence_blowups = {n_blowups}). This " "means the merit function suffered a catastrophic blow-up " "(``merit_new - best_merit > divergence_factor * " "max(|best_merit|, 1)`` for ``divergence_patience`` " "consecutive steps; defaults are ``10.0`` and ``3``) and the " "iterate the driver returned was overwritten with " "``state.best_x``. The L-BFGS history, multipliers and " "gradients still reflect the *attempted* step, so the " "report's 'Final iterate metrics' section may show post-" "blow-up values that do not correspond to the actually-" "returned iterate. See ``AGENTS.md`` 'Best-iterate " "divergence rollback'. The first blow-up was at step " f"{offending_step}; inspect the trajectory around there for " "the failure mechanism (typically L-BFGS poisoning + " "merit-penalty over-correction)." ) return _build_signal( name="divergence_rollback_triggered", specificity="specific", summary=( "The best-iterate divergence rollback looks suspicious " f"because it latched (triggered={triggered}) after " f"{n_blowups} blow-up event(s); the returned iterate is " "``best_x``, not the attempted final iterate." ), detail=detail, evidence={ "divergence_triggered": 1 if triggered else 0, "n_divergence_blowups": n_blowups, "patience_default": _DIVERGENCE_PATIENCE_DEFAULT, "offending_step": offending_step, }, threshold_ratio=threshold_ratio, suggestions=[ "Inspect the trajectory around the offending step " "(use ``capture_state_at_step``) for an L-BFGS diagonal " "blow-up paired with a merit-penalty jump — the usual " "cascade is QP-budget exhaustion → noisy curvature pair " "appended → kappa(B0) explosion → unrecoverable QP " "direction → merit blow-up → rollback.", "Try ``use_exact_hvp_in_qp=True`` so the QP no longer " "depends on the L-BFGS approximation (the most common " "way to break the cascade).", "Lower ``divergence_patience`` so the rollback fires " "earlier, before the post-blow-up state corrupts other " "quantities the report surfaces.", ], artifacts={}, offending_step=offending_step, ) # Total-trajectory ratio threshold: ``rho`` grew by 6+ orders of # magnitude across the run. The Han-Powell penalty update is meant # to grow ``rho`` monotonically by small factors per step; a 1e6 # total span is a strong indicator of an over-correction cycle. _RHO_EXPLOSION_TOTAL_RATIO = 1e6 # Per-step ratio threshold: ``rho`` jumped by 1000x or more in a # single step. Either threshold tripping is enough. _RHO_EXPLOSION_STEP_RATIO = 1e3 def _eval_merit_penalty_explosion( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: merit penalty ``rho`` exploded across the trajectory. Cheap predicate: either ``max(rho) / max(min(rho), 1) > _RHO_EXPLOSION_TOTAL_RATIO`` (the trajectory-wide span exceeds 1e6) OR ``max(rho[i] / max(rho[i-1], 1)) > _RHO_EXPLOSION_STEP_RATIO`` (a single-step jump exceeds 1e3). Either pattern signals that the Han-Powell penalty update over-corrected, usually because the QP direction was poisoned by a degenerate inner solve and the merit function's predicted reduction stopped agreeing with the realised one. Specificity: ``specific``. ``rho`` is monotone-non-decreasing by construction, and the update cadence is bounded by problem geometry; a 6-orders-of-magnitude span across the run or a 1000x jump in one step has a narrow set of causes (L-BFGS poisoning + merit catch-up is the dominant one). """ if not summaries: return None # pragma: no cover rhos = [max(float(s.merit_penalty), 1.0) for s in summaries] min_rho = min(rhos) max_rho = max(rhos) ratio_total = max_rho / min_rho if len(rhos) > 1: per_step_ratios = [rhos[i] / rhos[i - 1] for i in range(1, len(rhos))] ratio_step = max(per_step_ratios) # Step where the largest single-step jump happened (1-indexed # to match ``StepSummary.step_count`` semantics). jump_idx_zero = max(range(1, len(rhos)), key=lambda i: rhos[i] / rhos[i - 1]) jump_step = int(summaries[jump_idx_zero].step_count) else: ratio_step = 1.0 jump_step = int(summaries[-1].step_count) total_fired = ratio_total > _RHO_EXPLOSION_TOTAL_RATIO step_fired = ratio_step > _RHO_EXPLOSION_STEP_RATIO if not (total_fired or step_fired): return None threshold_ratio = max( ratio_total / _RHO_EXPLOSION_TOTAL_RATIO, ratio_step / _RHO_EXPLOSION_STEP_RATIO, ) detail = ( "The merit penalty ``rho`` trajectory exploded across the " f"run: span ratio ``max(rho) / max(min(rho), 1) = " f"{ratio_total:.3e}`` (threshold " f"{_RHO_EXPLOSION_TOTAL_RATIO:.0e}), largest single-step " f"jump ``{ratio_step:.3e}`` (threshold " f"{_RHO_EXPLOSION_STEP_RATIO:.0e}, at step {jump_step}). " "The Han-Powell update grows ``rho`` monotonically by " "small factors per step; a span this large is the " "merit function trying to catch up after a QP step poisoned " "the predicted-vs-realised merit reduction (usually because " "an L-BFGS pair from a fallback projected-gradient direction " "was appended to the curvature buffer and corrupted the " "subsequent Newton direction). Once ``rho`` is over-sized, " "every subsequent line search has to absorb a much larger " "merit derivative and typically collapses to the LS floor." ) return _build_signal( name="merit_penalty_explosion", specificity="specific", summary=( "The merit penalty ``rho`` looks suspicious because it " f"spanned {ratio_total:.3e}x across the run " f"(min={min_rho:.3e}, max={max_rho:.3e}; largest " f"single-step jump {ratio_step:.3e}x at step {jump_step})." ), detail=detail, evidence={ "rho_min": min_rho, "rho_max": max_rho, "rho_ratio_total": ratio_total, "rho_ratio_step": ratio_step, "rho_threshold_total": _RHO_EXPLOSION_TOTAL_RATIO, "rho_threshold_step": _RHO_EXPLOSION_STEP_RATIO, "jump_step": jump_step, }, threshold_ratio=threshold_ratio, suggestions=[ "Try ``use_exact_hvp_in_qp=True`` so the QP no longer " "depends on the L-BFGS approximation; the merit-penalty " "over-correction is almost always downstream of an " "L-BFGS poisoning event.", "Raise ``qp_max_iter`` so QP-budget exhaustion (which " "forces the projected-gradient fallback that poisons the " "L-BFGS history) is less likely.", "Cap the merit penalty growth by passing a smaller " "``proximal_mu_max`` if the equality constraints are " "well-scaled; the ceiling prevents runaway ``rho``.", "If the run was launched with ``auto_scale=False`` (or via " "a path that bypasses :func:`slsqp_jax.minimize_like_scipy`), " "enable auto-scaling -- a runaway ``rho`` is almost always " "the merit function trying to reconcile mismatched objective " "and constraint magnitudes. The Auto-scaling section of " "the report (when present) lists the factors that were " "applied.", "The new ``auto_scale=True`` (uniform mode, default) uses a " "single shared scalar across all constraint rows -- it " "prevents the cascade without flattening the relative row " "magnitudes. If your constraints have intentionally " "heterogeneous magnitudes (e.g. a top-level budget plus " "smaller sub-budgets) this is the right mode. If a single " "constraint row has *vastly* different gradient magnitude " "from the rest, ``auto_scale='balanced'`` (per-row, the " "old default) flattens those rows individually and may " "rescue the run. ``auto_scale='aggressive'`` raises the " "amplification ceiling for very small-gradient problems.", ], artifacts={ "rho_trajectory": np.asarray(rhos, dtype=float), }, offending_step=jump_step if step_fired else None, ) # ``rho`` is treated as "frozen" for the prefix when its span is # below this multiplicative tolerance. ``rho`` is mutated by an # additive-then-clip update in the solver, so a true "did not move" # signal needs a tiny epsilon rather than equality testing. _RHO_FROZEN_TOL = 1.0 + 1e-9 # Minimum violation growth that constitutes "drift": the worst # infeasibility at the end of the prefix must be at least this # multiple of the worst infeasibility at the start. _STARVATION_VIOLATION_GROWTH = 5.0 # The frozen-prefix must span at least this many steps (and at # least 5 absolute) to fire. Short prefixes are noise. _STARVATION_PREFIX_FRACTION = 1.0 / 3.0 def _eval_penalty_starvation( ctx: EvalContext, final_state: "SLSQPState", summaries: list["StepSummary"], coarse_result: Any, ) -> Optional[Signal]: """End-of-run: ``rho`` stayed at its initial value while feasibility drifted. Cheap predicate: the longest prefix of summaries during which ``rho`` is constant (within :data:`_RHO_FROZEN_TOL`) spans at least ``max(5, n_steps // 3)`` steps, ``max_eq_violation`` is monotone non-decreasing across that prefix, and grew by at least :data:`_STARVATION_VIOLATION_GROWTH` (= 5x) from the first to the last step of the prefix. Catches the "started feasible, drifted off, ``rho`` never grew" pathology described in the diagnostic notes for the feasible- start divergence run: the Han-Powell directional-derivative test cannot trigger a ``rho`` increase when the ``f`` reduction alone satisfies it, so feasibility can decay silently for many steps. Specificity: ``specific``. """ if not summaries: return None # pragma: no cover n_steps = len(summaries) min_prefix = max(5, n_steps // 3) if n_steps < min_prefix: return None # Identify the longest leading prefix where ``rho`` is frozen. rho0 = max(float(summaries[0].merit_penalty), 1.0) prefix_end = 0 for i, s in enumerate(summaries): rho_i = max(float(s.merit_penalty), 1.0) ratio = max(rho_i / rho0, rho0 / rho_i) if ratio > _RHO_FROZEN_TOL: break prefix_end = i prefix_len = prefix_end + 1 if prefix_len < min_prefix: return None prefix = summaries[: prefix_end + 1] eq_violations = [float(s.max_eq_violation) for s in prefix] ineq_violations = [float(s.max_ineq_violation) for s in prefix] # Treat eq + ineq as a unified "infeasibility" proxy so the test # works for both equality- and inequality-only problems. violations = [max(e, i) for e, i in zip(eq_violations, ineq_violations)] # Monotone non-decreasing test (with a small tolerance to # absorb numerical jitter at the floor). monotone = all( violations[i] >= violations[i - 1] - 1e-15 for i in range(1, len(violations)) ) if not monotone: return None start_violation = violations[0] end_violation = violations[-1] # Need a non-zero starting violation to avoid 0/0; if it is at # the floor, require an absolute growth above ``atol`` instead. atol = ctx.atol if start_violation > 0 and start_violation > atol: growth_ratio = end_violation / start_violation else: # Compare against ``atol`` so a near-zero start that grew to # ``5 * atol`` qualifies (the canonical feasible-start case). growth_ratio = end_violation / max(atol, 1e-300) if growth_ratio < _STARVATION_VIOLATION_GROWTH: return None threshold_ratio = growth_ratio / _STARVATION_VIOLATION_GROWTH detail = ( f"The merit penalty ``rho`` stayed constant at " f"{rho0:.3e} for the first {prefix_len} of {n_steps} " "iterations while the worst infeasibility grew " f"monotonically from {start_violation:.3e} to " f"{end_violation:.3e}. This is the textbook " "'penalty-starved feasibility drift' pattern: the " "Han-Powell directional-derivative test cannot trigger a " "``rho`` increase when the ``f`` reduction alone satisfies " "the predicted-vs-realised inequality, so the merit " "function happily accepts steps that improve ``f`` at the " "cost of feasibility. When feasibility eventually decays " "enough to require a real ``rho`` correction, the catch-up " "is usually huge and downstream poisons the L-BFGS history " "(see ``merit_penalty_explosion`` and " "``divergence_rollback_triggered`` for the typical " "downstream signals). See ``AGENTS.md`` 'L1 merit " "directional derivative' for the update rule." ) return _build_signal( name="penalty_starvation", specificity="specific", summary=( "The merit penalty ``rho`` looks suspicious because it " f"stayed constant at {rho0:.3e} for {prefix_len} of " f"{n_steps} steps while max-infeasibility grew " f"{growth_ratio:.1f}x (from {start_violation:.3e} to " f"{end_violation:.3e})." ), detail=detail, evidence={ "rho_initial": rho0, "frozen_prefix_steps": prefix_len, "n_steps": n_steps, "violation_start": start_violation, "violation_end": end_violation, "growth_ratio": growth_ratio, "growth_threshold": _STARVATION_VIOLATION_GROWTH, }, threshold_ratio=threshold_ratio, suggestions=[ "If the supplied initial iterate is exactly feasible, " "perturb it slightly (e.g. ``x0 + atol * sign_vector``) " "so the merit-penalty update mechanism warms ``rho`` up " "before feasibility drifts.", "Bump the initial ``merit_penalty`` so the feasibility " "term has weight from step 1.", "Check the magnitude balance between the constraint " "Jacobian and the objective gradient: if " "``eq_jac_min_sv_est`` (in the diagnostics block) is " "much larger than ``||grad_f||``, the constraint is " "much steeper than the objective and the merit " "function cannot reconcile their natural scales without " "a huge ``rho``. Rescaling the constraint (e.g. drop " "the ``1/target`` division if the constraint is " "``(h(x) - target) / target``) so ``||J_eq||`` is " "comparable to ``||grad_f||`` is the durable fix.", "If LPEC-A is enabled, raise ``lpeca_warmup_steps`` so " "the predictor does not contaminate the early-iteration " "active set on a near-feasible iterate.", "If the run was launched with ``auto_scale=False`` (or via " "a path that bypasses :func:`slsqp_jax.minimize_like_scipy`), " "enable auto-scaling -- the ``||J_eq|| >> ||grad_f||`` " "magnitude mismatch that triggers penalty starvation is " "exactly what gradient-based scaling is designed to fix. " "The Auto-scaling section of the report (when present) " "lists the factors that were applied.", "The new ``auto_scale=True`` (uniform mode, default) " "preserves the relative row magnitudes between constraints " "while bringing the constraint Jacobian into the same " "range as the objective gradient -- the right choice when " "your constraints encode meaningful inter-row structure " "(e.g. a top-level budget plus smaller sub-budgets). If " "*one* constraint row has vastly different gradient " "magnitude from the rest and that's not a meaningful " "spread, ``auto_scale='balanced'`` (per-row, the old " "default) flattens those rows individually and may rescue " "the run. ``auto_scale='aggressive'`` raises the " "amplification ceiling.", ], artifacts={ "rho_prefix": np.asarray( [float(s.merit_penalty) for s in prefix], dtype=float ), "violations_prefix": np.asarray(violations, dtype=float), }, offending_step=int(prefix[-1].step_count), ) def _result_name_safe(result: Any) -> str: """Wrap :func:`slsqp_jax.diagnostics.report._result_name` for use here. Imported lazily to avoid a circular import: ``signals`` is imported by ``__init__`` before ``report``. """ from slsqp_jax.diagnostics.report import _result_name return _result_name(result) # --------------------------------------------------------------------------- # Registry installation # --------------------------------------------------------------------------- # Register all 8 starter signals at import time so the runner picks # them up by default. Test code can re-register an alternative # evaluator under the same ``name`` to shift thresholds without # editing this module. register_evaluator( "eq_jacobian_rank_deficient", specificity="specific", flavour="per_step", evaluator=_eval_eq_jacobian_rank_deficient, ) register_evaluator( "lbfgs_conditioning_extreme", specificity="ambiguous", flavour="per_step", evaluator=_eval_lbfgs_conditioning_extreme, ) register_evaluator( "multiplier_recovery_noise", specificity="specific", flavour="end_of_run", evaluator=_eval_multiplier_recovery_noise, ) register_evaluator( "line_search_collapse", specificity="ambiguous", flavour="end_of_run", evaluator=_eval_line_search_collapse, ) register_evaluator( "qp_budget_or_pingpong", specificity="generic", flavour="end_of_run", evaluator=_eval_qp_budget_or_pingpong, ) register_evaluator( "merit_oscillation", specificity="generic", flavour="end_of_run", evaluator=_eval_merit_oscillation, ) register_evaluator( "lpeca_overpredicting", specificity="ambiguous", flavour="end_of_run", evaluator=_eval_lpeca_overpredicting, ) register_evaluator( "infeasible_termination", specificity="specific", flavour="end_of_run", evaluator=_eval_infeasible_termination, ) register_evaluator( "divergence_rollback_triggered", specificity="specific", flavour="end_of_run", evaluator=_eval_divergence_rollback_triggered, ) register_evaluator( "merit_penalty_explosion", specificity="specific", flavour="end_of_run", evaluator=_eval_merit_penalty_explosion, ) register_evaluator( "penalty_starvation", specificity="specific", flavour="end_of_run", evaluator=_eval_penalty_starvation, ) __all__ = [ "Confidence", "EndOfRunEvaluator", "END_OF_RUN_EVALUATORS", "EvalContext", "Magnitude", "PER_STEP_EVALUATORS", "PerStepEvaluator", "SIGNAL_REGISTRY", "Signal", "SignalRegistration", "Specificity", "register_evaluator", ]