"""Signal evaluators and the registry that wires them in.
A signal is the diagnostics layer's atomic unit of evidence: an
evaluator function decides at fire time whether a known suspicious
pattern is present, and (when it fires) immediately builds the
linalg artifacts it needs from the live state so the report renderer
can surface them without re-running the solver.
The plan distinguishes two flavours of evaluator:
* **Per-step evaluators** receive ``(state, summary, summaries)`` and
are called inside the runner's iteration loop while the live
``SLSQPState`` is in scope. Each evaluator splits into a cheap
predicate (``cheap_predicate(summary) -> bool``) that runs every
step from host scalars only, and an expensive
``build_artifacts(state) -> dict`` that pulls device arrays only
when the predicate fires.
* **End-of-run evaluators** receive
``(final_state, summaries, coarse_result)`` and run once after the
loop exits. They never see a non-final ``SLSQPState``; trajectory
inspection happens against the ``StepSummary`` list instead.
Both flavours are registered into :data:`PER_STEP_EVALUATORS` /
:data:`END_OF_RUN_EVALUATORS` (Phase 2 fills them in; Phase 1 ships
empty so the runner has the same call surface from day one).
"""
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal, Optional
import jax.numpy as jnp
import numpy as np
from slsqp_jax.diagnostics.playbook import confidence_for, magnitude_for
if TYPE_CHECKING:
from slsqp_jax.diagnostics.records import StepSummary
from slsqp_jax.slsqp import SLSQP
from slsqp_jax.state import SLSQPState
Specificity = Literal["specific", "ambiguous", "generic"]
Magnitude = Literal["marginal", "moderate", "extreme"]
Confidence = Literal["low", "medium", "high"]
@dataclass(frozen=True)
class EvalContext:
"""Bundle of solver-derived constants that evaluators need.
Threaded through the runner so per-step + end-of-run evaluators
can read the user's :attr:`rtol` / :attr:`atol` (e.g. for the
``multiplier_recovery_noise`` test which compares against
``rtol * max(mu_max, 1)`` — see
:func:`slsqp_jax.slsqp.termination.compute_mu_max`) without
taking a stateful dependency on the solver instance.
Attributes:
solver: The :class:`slsqp_jax.SLSQP` instance the run used.
Carried because some artifacts (e.g. the initial L-BFGS
memory size) live on the solver, not the state.
rtol: Relative-stationarity tolerance used by ``terminate()``.
atol: Primal-feasibility tolerance used by ``terminate()``.
max_steps: Maximum number of SQP iterations the manual loop
was allowed to run.
"""
solver: "SLSQP"
rtol: float
atol: float
max_steps: int
[docs]
@dataclass(frozen=True)
class Signal:
"""Single evidence-backed hypothesis emitted by an evaluator.
The wording contract is *hypothesis with evidence*, not verdict.
``summary`` reads "X looks suspicious because Y"; ``detail``
expands on the mechanism; ``suggestions`` are starting points for
the user to investigate, not prescriptions to apply blindly.
Attributes:
name: Stable machine-readable identifier (used for the
playbook scope filter and the cap-policy enforcement
test).
specificity: How uniquely this pattern points at one cause.
``specific`` = essentially diagnostic for one cause;
``ambiguous`` = consistent with two or three named causes;
``generic`` = consistent with many causes. Static
editorial tag set at registration.
magnitude: How far past threshold the evidence landed.
``marginal`` = within 10x of threshold;
``moderate`` = 10x-100x past threshold;
``extreme`` = >100x past threshold. Computed from
``evidence`` at fire time.
confidence: Derived from ``(specificity, magnitude)`` via the
lookup table in :func:`slsqp_jax.diagnostics.playbook.confidence_for`.
The single ranking axis the report sorts by.
summary: One-line "X looks suspicious because Y" framing.
detail: Multi-paragraph mechanism explanation.
evidence: Numeric evidence keyed by short name. Each value
is the actual measurement; the threshold is documented in
the evaluator's docstring.
suggestions: Concrete starting-point fixes (config knobs,
alternative inner solvers, etc).
artifacts: Heavy linalg objects the evaluator computed
immediately from the live state when it fired. May be
empty.
offending_step: 1-indexed step the signal fired on, or ``None``
for end-of-run evaluators that scope over the whole run.
"""
name: str
specificity: Specificity
magnitude: Magnitude
confidence: Confidence
summary: str
detail: str
evidence: dict[str, float | int] = field(default_factory=dict)
suggestions: list[str] = field(default_factory=list)
artifacts: dict[str, np.ndarray] = field(default_factory=dict)
offending_step: Optional[int] = None
# The two flavours of evaluator callable signature. Kept as bare
# Callable aliases (rather than typing.Protocol) so subclassing /
# duck-typing in tests is trivial. Both flavours take the
# :class:`EvalContext` as the first positional arg so they can read
# tolerances / solver config without a separate import.
PerStepEvaluator = Callable[
[EvalContext, "SLSQPState", "StepSummary", list["StepSummary"]],
Optional[Signal],
]
EndOfRunEvaluator = Callable[
[EvalContext, "SLSQPState", list["StepSummary"], Any],
Optional[Signal],
]
@dataclass(frozen=True)
class SignalRegistration:
"""Static registration record for a signal evaluator.
The ``cap-policy`` enforcement test
(``tests/diagnostics/test_registry.py``) iterates the registries
of :data:`PER_STEP_EVALUATORS` / :data:`END_OF_RUN_EVALUATORS`
and asserts that for every ``name`` here there exists a
``test_signal_<name>_synthetic`` and ``test_signal_<name>_integration``
in the diagnostics test module. ``specificity`` is captured here
so it stays adjacent to the evaluator and the docstring (single
source of truth).
"""
name: str
specificity: Specificity
evaluator: Callable
flavour: Literal["per_step", "end_of_run"]
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
# Phase 1 ships empty registries; Phase 2 populates them via
# :func:`register_evaluator`. These are the canonical lists the
# runner walks each iteration / at end-of-run.
PER_STEP_EVALUATORS: tuple[PerStepEvaluator, ...] = ()
END_OF_RUN_EVALUATORS: tuple[EndOfRunEvaluator, ...] = ()
# Parallel registry of the static metadata for each evaluator. Kept
# as a list (not a tuple) so :func:`register_evaluator` can append in
# place without reconstructing the public per-step / end-of-run
# tuples on every call.
SIGNAL_REGISTRY: list[SignalRegistration] = []
def register_evaluator(
name: str,
*,
specificity: Specificity,
flavour: Literal["per_step", "end_of_run"],
evaluator: Callable,
) -> None:
"""Append an evaluator to the appropriate registry.
Phase 2 calls this at module import time for each starter signal.
The function is idempotent on ``name`` (re-registering the same
name overwrites the previous record), which simplifies tests
that exercise alternative thresholds.
"""
global PER_STEP_EVALUATORS, END_OF_RUN_EVALUATORS
# Replace any prior registration with the same name.
for i, reg in enumerate(SIGNAL_REGISTRY):
if reg.name == name: # pragma: no cover
SIGNAL_REGISTRY.pop(i)
break
SIGNAL_REGISTRY.append(
SignalRegistration(
name=name,
specificity=specificity,
evaluator=evaluator,
flavour=flavour,
)
)
PER_STEP_EVALUATORS = tuple(
reg.evaluator for reg in SIGNAL_REGISTRY if reg.flavour == "per_step"
)
END_OF_RUN_EVALUATORS = tuple(
reg.evaluator for reg in SIGNAL_REGISTRY if reg.flavour == "end_of_run"
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _to_numpy(arr: Any) -> np.ndarray:
"""Materialise a JAX device array to a NumPy host array.
Used by :func:`build_artifacts` paths to pull the (small) linalg
objects a fired signal wants the user to be able to dig into
further. Idempotent for inputs that are already NumPy arrays.
"""
return np.asarray(arr)
def _build_signal(
*,
name: str,
specificity: Specificity,
summary: str,
detail: str,
evidence: dict[str, float | int],
threshold_ratio: float,
suggestions: list[str],
artifacts: Optional[dict[str, np.ndarray]] = None,
offending_step: Optional[int] = None,
) -> Signal:
"""Construct a :class:`Signal` with derived ``magnitude`` /
``confidence`` from a ratio-to-threshold input.
``threshold_ratio`` is the largest ratio of any single evidence
value to its documented threshold (always ``>= 1`` whenever the
signal fires). Centralising the magnitude / confidence
derivation here means each evaluator just computes the worst
ratio and hands it off, instead of duplicating the lookup table.
"""
magnitude = magnitude_for(threshold_ratio)
confidence = confidence_for(specificity, magnitude)
return Signal(
name=name,
specificity=specificity,
magnitude=magnitude,
confidence=confidence,
summary=summary,
detail=detail,
evidence=dict(evidence),
suggestions=list(suggestions),
artifacts=dict(artifacts or {}),
offending_step=offending_step,
)
def _just_crossed_below(
summaries: list["StepSummary"], attr: str, threshold: float
) -> bool:
"""``True`` iff the most-recent summary has ``attr < threshold``
AND the previous summary did not (or there is no previous summary).
Used to fire a signal exactly once on the *first* crossing so the
report does not list the same signal at every step it remains
tripped.
"""
if not summaries:
return False # pragma: no cover
cur = float(getattr(summaries[-1], attr))
if not (cur < threshold):
return False
if len(summaries) == 1:
return True # pragma: no cover
prev = float(getattr(summaries[-2], attr))
return not (prev < threshold)
def _streak_just_reached(
summaries: list["StepSummary"],
predicate: Callable[["StepSummary"], bool],
n: int,
) -> bool:
"""``True`` iff the last ``n`` summaries all satisfy ``predicate``
AND the immediately-prior summary did not (or did not exist).
Centralises the consecutive-N pattern used by
``lbfgs_conditioning_extreme`` and any future streak signal so a
signal fires once at the moment the streak first becomes long
enough, not at every subsequent step.
"""
if len(summaries) < n:
return False
tail = summaries[-n:]
if not all(predicate(s) for s in tail):
return False
if len(summaries) == n:
return True # pragma: no cover
return not predicate(summaries[-n - 1])
# ---------------------------------------------------------------------------
# Per-step evaluators
# ---------------------------------------------------------------------------
# Threshold below which the cumulative low-water singular-value
# estimate of ``J_eq`` is treated as a LICQ violation. Sourced from
# the AGENTS.md "QP failure diagnostics" + ``min_projected_grad_norm``
# discussions: the projected-CG / CRAIG inner solvers start to suffer
# from regularisation bias once ``sigma_min(J_eq) < 1e-8``.
_RANK_DEF_THRESHOLD = 1e-8
def _eval_eq_jacobian_rank_deficient(
ctx: EvalContext,
state: "SLSQPState",
summary: "StepSummary",
summaries: list["StepSummary"],
) -> Optional[Signal]:
"""Per-step: equality Jacobian appears rank-deficient.
Cheap predicate (scalar): the cumulative low-water mark
``eq_jac_min_sv_est`` carried on
:attr:`SLSQPDiagnostics.eq_jac_min_sv_est` just dropped below
:data:`_RANK_DEF_THRESHOLD` (= ``1e-8``) at this step. Because
the diagnostics counter is a low-water mark rather than a
per-step value, the "first crossing" predicate is the right way
to fire only once per run.
Build_artifacts (only invoked if the predicate fires): pulls
``state.eq_jac`` (``m_eq * n * 8`` bytes) to host, computes
``J J^T`` (``m_eq^2 * 8`` bytes — tiny), and the singular values
of ``J_eq`` for the user to inspect.
Specificity: ``specific``. The (sigma_min < 1e-8) signature
essentially uniquely points at LICQ violation / projector
instability.
Hypothesis: the equality Jacobian is (near-)rank-deficient at the
current iterate, which makes the null-space projector
``v - A^T (A A^T)^{-1} A v`` numerically unstable. See
``AGENTS.md`` "Cholesky-based projection" and the
``CraigInnerSolver`` notes for context.
"""
# Nothing to fire when there are no equality constraints.
if int(state.eq_val.shape[0]) == 0:
return None
if not _just_crossed_below(summaries, "eq_jac_min_sv_est", _RANK_DEF_THRESHOLD):
return None
# ---- build_artifacts (live state in scope) ----
J_eq = _to_numpy(state.eq_jac)
JJT = _to_numpy(state.eq_jac @ state.eq_jac.T)
try:
sv = _to_numpy(jnp.linalg.svd(state.eq_jac, compute_uv=False))
except Exception: # pragma: no cover -- defensive
sv = np.linalg.svd(J_eq, compute_uv=False)
sigma_min = float(np.min(sv))
sigma_max = float(np.max(sv))
threshold_ratio = _RANK_DEF_THRESHOLD / max(sigma_min, 1e-300)
detail = (
"The equality Jacobian J_eq has min singular value "
f"{sigma_min:.3e} (cumulative low-water "
f"{summary.eq_jac_min_sv_est:.3e}). Below ~1e-8 the "
"null-space projector v - J_eq^T (J_eq J_eq^T)^{-1} J_eq v "
"is numerically unstable: the Cholesky regularisation "
"epsilon * I starts to bias the projection, and CRAIG / "
"MINRES-QLP can stall. This pattern is essentially "
"diagnostic for a LICQ violation, but it can also surface "
"when the equality constraints are well-posed analytically "
"yet badly scaled."
)
return _build_signal(
name="eq_jacobian_rank_deficient",
specificity="specific",
summary=(
f"J_eq looks suspicious because its smallest singular "
f"value ({sigma_min:.3e}) is below 1e-8 — the null-space "
"projector is numerically unstable here."
),
detail=detail,
evidence={
"sigma_min": sigma_min,
"sigma_max": sigma_max,
"threshold": _RANK_DEF_THRESHOLD,
"step": summary.step_count,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Check that the equality constraints are linearly "
"independent at the iterate (LICQ).",
"If the constraints are well-posed but badly scaled, "
"rescale them so each row of J_eq has comparable norm.",
"Try MinresQLPSolver (handles indefinite/singular saddle-"
"point systems natively) or CraigInnerSolver (no "
"epsilon * I regularisation in the projector).",
],
artifacts={
"J_eq": J_eq,
"JJT": JJT,
"singular_values": sv,
},
offending_step=int(summary.step_count),
)
_LBFGS_KAPPA_THRESHOLD = 1e6
_LBFGS_KAPPA_STREAK = 3
# Single-step "burst" trigger: catches catastrophic one-shot blow-ups
# that the streak gate misses (the L-BFGS reset chain typically clamps
# back to identity within one or two steps after the spike, so the
# streak never reaches its 3-step minimum). Two orders of magnitude
# above ``_LBFGS_KAPPA_THRESHOLD`` so a routine "above-but-recovering"
# kappa does not also fire the burst clause.
_LBFGS_KAPPA_BURST = 1e8
def _eval_lbfgs_conditioning_extreme(
ctx: EvalContext,
state: "SLSQPState",
summary: "StepSummary",
summaries: list["StepSummary"],
) -> Optional[Signal]:
"""Per-step: L-BFGS B0 diagonal condition number is extreme.
Cheap predicate (scalar): either of two clauses fires.
1. **Streak**: the L-BFGS diagonal condition-number proxy
``max_diag / min_diag`` (carried on
:attr:`StepSummary.diag_kappa`) has been above
:data:`_LBFGS_KAPPA_THRESHOLD` (= ``1e6``) for the last
:data:`_LBFGS_KAPPA_STREAK` (= ``3``) consecutive steps.
Three is the ``soft reset`` patience the L-BFGS reset chain
uses; once the streak reaches that length the chain triggers a
soft reset, so it is also the right point to flag the user.
2. **Burst**: the *current* step's ``diag_kappa`` is above
:data:`_LBFGS_KAPPA_BURST` (= ``1e8``), which catches
catastrophic one-shot blow-ups (e.g. ``kappa = 1e+09`` for a
single step before the reset chain clamps back to identity).
The streak gate cannot catch these because the post-spike
identity reset breaks the streak after one step.
Build_artifacts (only invoked if the predicate fires): pulls the
full L-BFGS diagonal (``n * 8`` bytes — fits in L1 even for very
large ``n``) plus the scalar gamma / history count.
Specificity: ``ambiguous``. Extreme L-BFGS conditioning is
consistent with several causes (stale curvature pairs, post-
identity-reset skip lock, bad initial scaling), so the report
cannot pin it on one without more evidence.
"""
burst_fired = float(summary.diag_kappa) > _LBFGS_KAPPA_BURST
streak_fired = _streak_just_reached(
summaries,
lambda s: s.diag_kappa > _LBFGS_KAPPA_THRESHOLD,
_LBFGS_KAPPA_STREAK,
)
if not (streak_fired or burst_fired):
return None
mode = "burst" if burst_fired else "streak"
diagonal = _to_numpy(state.lbfgs_history.diagonal)
gamma = float(state.lbfgs_history.gamma)
history_count = int(state.lbfgs_history.count)
if burst_fired:
threshold_ratio = float(summary.diag_kappa) / _LBFGS_KAPPA_BURST
clause_summary = (
f"a single-step burst above {_LBFGS_KAPPA_BURST:.0e} "
"(catastrophic one-shot blow-up)"
)
else:
threshold_ratio = float(summary.diag_kappa) / _LBFGS_KAPPA_THRESHOLD
clause_summary = (
f"a streak of {_LBFGS_KAPPA_STREAK} consecutive iterations "
f"above {_LBFGS_KAPPA_THRESHOLD:.0e}"
)
detail = (
f"L-BFGS B0 diagonal condition number kappa(B0) = "
f"{summary.diag_kappa:.3e} (min={summary.min_diag:.3e}, "
f"max={summary.max_diag:.3e}) tripped the "
f"{mode!r} clause: {clause_summary}. "
"Extreme conditioning is consistent with several causes: "
"(i) stale curvature pairs whose s/y vectors are no longer "
"informative at the current iterate, (ii) the post-identity-"
"reset skip lock the AGENTS.md describes (lowering "
"``lbfgs_skip_floor`` from 1e-8 to 1e-12 was the fix), "
"(iii) badly-scaled variables, and (iv) a fallback projected-"
"gradient direction (after a QP-budget exhaustion) being "
"appended to the L-BFGS history as if it were a Newton "
"secant. Each cause has a different fix; check "
"``state.lbfgs_history.count`` and the ``n_lbfgs_skips`` "
"counter to disambiguate."
)
return _build_signal(
name="lbfgs_conditioning_extreme",
specificity="ambiguous",
summary=(
f"The L-BFGS B0 diagonal looks suspicious because its "
f"condition number is {summary.diag_kappa:.3e} ({mode} "
f"clause: {clause_summary})."
),
detail=detail,
evidence={
"diag_kappa": summary.diag_kappa,
"min_diag": summary.min_diag,
"max_diag": summary.max_diag,
"threshold": _LBFGS_KAPPA_THRESHOLD,
"burst_threshold": _LBFGS_KAPPA_BURST,
"streak": _LBFGS_KAPPA_STREAK,
# ``burst_clause = 1`` ⇔ the single-step burst fired,
# ``burst_clause = 0`` ⇔ the streak fired. Encoded as int
# because :attr:`Signal.evidence` is typed numeric-only.
"burst_clause": 1 if burst_fired else 0,
"step": summary.step_count,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Try ``use_exact_hvp_in_qp=True`` so the QP no longer "
"depends on the L-BFGS approximation.",
"If many curvature pairs are being skipped "
"(``n_lbfgs_skips`` is large), inspect the secant pairs "
"for noise.",
"Consider rescaling the variables so coordinate-wise "
"curvatures are within a few orders of magnitude.",
],
artifacts={
"diagonal": diagonal,
"gamma": np.asarray(gamma),
"history_count": np.asarray(history_count),
},
offending_step=int(summary.step_count),
)
# ---------------------------------------------------------------------------
# End-of-run evaluators
# ---------------------------------------------------------------------------
def _eval_multiplier_recovery_noise(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: noise-floor stationarity stall pattern.
The textbook signature documented in ``AGENTS.md``:
* ``min_projected_grad_norm / max(mu_max, 1) < rtol`` (the inner
solver's noise-aware stationarity proxy already passed)
* ``|grad_L| / max(mu_max, 1) > rtol`` (the classical
stationarity proxy did not)
* ``n_steps_inexact_below_classical / step_count > 0.5`` (more
than half the iterations had a strictly-cleaner projected
gradient than classical, i.e. multiplier-recovery noise was
the limiting factor across the bulk of the run)
All three together are essentially diagnostic for the noise-floor
stationarity stall mode that ``HRInexactSTCG`` +
``use_inexact_stationarity=True`` is designed to rescue.
Specificity: ``specific``.
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
last = summaries[-1]
kkt_scale = max(last.kkt_scale, 1.0)
rtol = ctx.rtol
classical_ratio = last.grad_lagrangian_norm / kkt_scale
proj_ratio = float(diag.min_projected_grad_norm) / kkt_scale
n_inexact_below = int(diag.n_steps_inexact_below_classical)
fraction_below = n_inexact_below / max(last.step_count, 1)
if not (proj_ratio < rtol):
return None
if not (classical_ratio > rtol):
return None # pragma: no cover
if not (fraction_below > 0.5):
return None # pragma: no cover
# Magnitude reflects how clean the projected ratio is relative to
# rtol versus how stuck the classical ratio is. We use the
# ratio of classical-to-projected; the further apart they are,
# the more obvious the noise-floor diagnosis.
threshold_ratio = classical_ratio / max(proj_ratio, 1e-300)
detail = (
"The inner solver's projected-gradient norm low-water mark "
f"satisfies ||W g|| / max(mu_max, 1) = {proj_ratio:.3e} < rtol "
f"({rtol:.1e}), while the classical Lagrangian gradient "
f"ratio ||grad_L|| / max(mu_max, 1) = {classical_ratio:.3e} is still "
"above rtol. This is the textbook signature of multiplier-"
"recovery noise contaminating the classical stationarity "
'test (see AGENTS.md "Inexact stationarity disjunct"). '
f"More than half the iterations ({n_inexact_below} / "
f"{last.step_count}) had ||W g|| < ||grad_L||, confirming "
"the pattern is persistent rather than a single-step "
"artefact. HRInexactSTCG paired with "
"``use_inexact_stationarity=True`` directly rescues this "
"mode."
)
return _build_signal(
name="multiplier_recovery_noise",
specificity="specific",
summary=(
f"The classical stationarity test ({classical_ratio:.3e}) "
f"looks suspicious because the inner-solver projected-"
f"gradient ratio ({proj_ratio:.3e}) already passed rtol "
f"({rtol:.1e}) but the recovered Lagrangian gradient is "
"noise-dominated."
),
detail=detail,
evidence={
"classical_ratio": classical_ratio,
"projected_ratio": proj_ratio,
"rtol": rtol,
"n_steps_inexact_below_classical": n_inexact_below,
"fraction_below": fraction_below,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Switch the inner solver to HRInexactSTCG (e.g. "
"``inner_solver=HRInexactSTCG(inner=ProjectedCGCholesky())``).",
"Set ``use_inexact_stationarity=True`` so the convergence "
"test admits the projected-gradient disjunct.",
],
artifacts={},
offending_step=None,
)
# Hard floor used as a fallback when the solver's LS budget cannot be
# read off ``EvalContext`` (e.g. synthetic tests that use a stand-in
# solver). ``2 ** -20 ~= 9.5e-7`` is the SciPy/SLSQP default; multiply
# by 10 so a single backtracking step above the floor still triggers.
_LS_COLLAPSE_FALLBACK_FLOOR = 10.0 * (2.0**-20)
def _ls_floor_for(ctx: EvalContext) -> float:
"""Return ``10 * 2**-line_search.max_steps`` for the active solver.
The backtracking line search halves ``alpha`` at most
``line_search.max_steps`` times before giving up, so the smallest
accepted ``alpha`` on success is ``2**-max_steps``. We multiply
by 10 so an LS that bottomed out within one backtracking step of
the floor still trips the predicate (matches the ``magnitude_for``
bucketing of "marginal" = within 10x).
Falls back to :data:`_LS_COLLAPSE_FALLBACK_FLOOR` (the SciPy default
``max_steps == 20``) when the solver instance does not expose the
``line_search_max_steps`` accessor — this lets the synthetic test
suite use a minimal stand-in solver without pulling in the full
:class:`SLSQP` class.
"""
solver = ctx.solver
max_steps = getattr(solver, "line_search_max_steps", None)
if not isinstance(max_steps, int) or max_steps <= 0:
return _LS_COLLAPSE_FALLBACK_FLOOR
return 10.0 * (2.0**-max_steps)
def _eval_line_search_collapse(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: line search collapsed to a tiny step at termination.
Cheap predicate: the cumulative ``tail_ls_failures`` counter is
> 0 (the line search failed at the last iteration) and the
minimum line-search alpha across the run was at or below the
LS floor ``10 * 2**-line_search.max_steps`` (i.e. within one
backtracking step of the smallest ``alpha`` the line search will
ever accept). See :func:`_ls_floor_for` for the derivation —
the previous hard-coded ``1e-10`` literal could never fire on
the default ``LineSearchConfig.max_steps == 20`` solver because
the LS floor itself is ``2**-20 ~= 9.5e-7``.
Specificity: ``ambiguous`` — a stuck LS is consistent with a bad
L-BFGS direction, a too-small merit penalty, or a non-descent
QP step.
Artifacts: the ``alpha`` trajectory near the offending step plus
the final L-BFGS diagonal. The exact L-BFGS diagonal at the
offending step is not preserved (that would require checkpointing
every step); use :func:`capture_state_at_step` if you need it.
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
tail_failures = int(diag.tail_ls_failures)
alpha_min = float(diag.ls_alpha_min)
ls_floor = _ls_floor_for(ctx)
if not (tail_failures > 0 and alpha_min <= ls_floor):
return None
# Locate the offending step (first time alpha drops to the LS
# floor while the LS was reporting failure).
offending_step = None
for s in summaries:
if (not s.ls_success) and s.last_alpha <= ls_floor:
offending_step = int(s.step_count)
break
if offending_step is None:
offending_step = int(summaries[-1].step_count) # pragma: no cover
# Trajectory window around the offending step (5 before + 5 after,
# clipped) for the artifact.
idx = max(0, offending_step - 1)
window_lo = max(0, idx - 5)
window_hi = min(len(summaries), idx + 6)
alpha_window = np.asarray(
[s.last_alpha for s in summaries[window_lo:window_hi]],
dtype=float,
)
final_diagonal = _to_numpy(final_state.lbfgs_history.diagonal)
threshold_ratio = ls_floor / max(alpha_min, 1e-300)
detail = (
f"The line search reached its smallest accepted alpha "
f"({alpha_min:.3e}) at or below the LS floor "
f"({ls_floor:.3e} = 10 * 2**-max_ls_steps) and exited with "
f"{tail_failures} tail line-search failures. This is "
"consistent with (i) a non-descent direction the QP returned "
"(L-BFGS conditioning, stale curvature), (ii) a too-small "
"merit penalty rho, or (iii) a step that violates the "
"Armijo condition by overshooting active constraints. The "
"alpha-trajectory window around the offending step is "
"attached as an artifact. The L-BFGS diagonal at the "
"*offending* step is not preserved by the diagnostics layer; "
"call ``capture_state_at_step(..., step="
f"{offending_step})`` to recover it exactly."
)
return _build_signal(
name="line_search_collapse",
specificity="ambiguous",
summary=(
f"The line search looks suspicious because alpha "
f"collapsed to {alpha_min:.3e} (<= LS floor "
f"{ls_floor:.3e}) and the run exited with "
f"{tail_failures} tail line-search failures."
),
detail=detail,
evidence={
"ls_alpha_min": alpha_min,
"tail_ls_failures": tail_failures,
"ls_floor": ls_floor,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Inspect the L-BFGS diagonal / gamma at the offending "
"step (use ``capture_state_at_step``). If kappa(B0) is "
"extreme, try ``use_exact_hvp_in_qp=True``.",
"Try a larger initial merit penalty (raise the QP "
"multipliers' bound or set a larger ``proximal_mu_max``).",
"Try MinresQLPSolver: it solves the saddle-point KKT "
"directly and is less sensitive to L-BFGS corruption.",
],
artifacts={
"alpha_window": alpha_window,
"lbfgs_diagonal_final": final_diagonal,
},
offending_step=offending_step,
)
def _eval_qp_budget_or_pingpong(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: QP active-set loop ran out of budget or ping-ponged.
Specificity: ``generic``. Both events have many possible causes
(LPEC-A over-prediction, degenerate vertex, coupled multipliers
from the inner solver, etc).
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
n_budget = int(diag.n_qp_budget_exhausted)
n_pingpong = int(diag.n_qp_ping_pong)
if not (n_budget > 0 or n_pingpong > 0):
return None
n_steps = max(len(summaries), 1)
fraction_budget = n_budget / n_steps
fraction_pingpong = n_pingpong / n_steps
# The "threshold" for either is "1 occurrence per run" ie 1/n_steps;
# ratio is just the count itself relative to a threshold of 1.
threshold_ratio = max(n_budget, n_pingpong) / 1.0
detail = (
f"The QP active-set loop hit its iteration budget on "
f"{n_budget} steps and short-circuited on a ping-pong on "
f"{n_pingpong} steps (out of {n_steps} total iterations). "
"Both events are usually downstream of one of: an LPEC-A "
"over-prediction the trust gate failed to catch (raise "
"``lpeca_trust_threshold`` or use "
"``active_set_method='expand'``), a degenerate vertex with "
"coupled constraints (raise ``mult_drop_floor``), or "
"noise-contaminated multipliers from the inner solver "
"(switch to MinresQLPSolver or HRInexactSTCG)."
)
return _build_signal(
name="qp_budget_or_pingpong",
specificity="generic",
summary=(
f"The QP layer looks suspicious because it exhausted its "
f"budget on {n_budget} steps and ping-ponged on "
f"{n_pingpong} steps."
),
detail=detail,
evidence={
"n_qp_budget_exhausted": n_budget,
"n_qp_ping_pong": n_pingpong,
"fraction_budget": fraction_budget,
"fraction_pingpong": fraction_pingpong,
"n_steps": n_steps,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Raise ``qp_max_iter`` to give the active-set loop more "
"headroom (current default may be too tight for "
"ill-conditioned problems).",
"If LPEC-A is enabled, raise ``lpeca_trust_threshold`` "
"or fall back to ``active_set_method='expand'``.",
"Raise ``mult_drop_floor`` so noise-flipped multipliers "
"do not spuriously drop active constraints.",
],
artifacts={},
offending_step=None,
)
def _eval_merit_oscillation(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: merit function regressed too often.
Cheap predicate: ``n_merit_regressions > step_count / 10`` (more
than 10 % of steps saw merit increase despite line-search
success). Specificity: ``generic`` — merit oscillation can be a
too-small ``rho``, a stale Hessian approximation, or both.
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
n_regressions = int(diag.n_merit_regressions)
n_steps = max(len(summaries), 1)
threshold = max(n_steps // 10, 1)
if not (n_regressions > threshold):
return None
threshold_ratio = n_regressions / threshold
detail = (
f"The L1 merit function increased on {n_regressions} of "
f"{n_steps} iterations despite the line search reporting "
"success. The Han-Powell merit function is monotone-"
"decreasing under a correctly-sized ``rho``; persistent "
"regressions point at ``rho`` being too small (the penalty "
"does not dominate the constraint violation) or at a stale "
"L-BFGS approximation that produces non-descent QP steps."
)
return _build_signal(
name="merit_oscillation",
specificity="generic",
summary=(
f"The merit function looks suspicious because it "
f"regressed on {n_regressions} of {n_steps} iterations "
f"(threshold = step_count / 10 = {threshold})."
),
detail=detail,
evidence={
"n_merit_regressions": n_regressions,
"n_steps": n_steps,
"threshold": threshold,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Inspect the merit penalty ``rho`` trajectory; if it "
"stays at the initial value, the penalty update is "
"starved of evidence.",
"Try ``use_exact_hvp_in_qp=True`` so the QP step "
"reflects the actual curvature of the Lagrangian.",
],
artifacts={},
offending_step=None,
)
def _eval_lpeca_overpredicting(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: LPEC-A over-predicted on > 50 % of steps OR was
bypassed every step.
Specificity: ``ambiguous`` — over-prediction can mean LPEC-A's
rho_bar is too pessimistic (rare) or that the iterate is far
from KKT (the warm-up gate should already cover this), so the
signal is informative but not pinpoint.
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
n_capped = int(diag.n_lpeca_capped)
n_bypassed = int(diag.n_lpeca_bypassed)
n_steps = max(len(summaries), 1)
fraction_capped = n_capped / n_steps
bypassed_all = n_bypassed == n_steps and n_steps > 0
if not (fraction_capped > 0.5 or bypassed_all):
return None
if bypassed_all:
threshold_ratio = float(n_bypassed) / 1.0 # pragma: no cover
else:
threshold_ratio = fraction_capped / 0.5
detail = (
f"LPEC-A's rank-aware size cap truncated the prediction on "
f"{n_capped} of {n_steps} iterations and the prediction was "
f"bypassed on {n_bypassed} iterations. Persistent capping "
"means rho_bar (the LPEC-A proximity measure) is letting "
"too many constraints into the predicted active set, which "
"in turn forces the cap to discard them. Persistent "
"bypassing means the trust gate vetoed the prediction every "
"step, in which case LPEC-A is contributing nothing. Both "
"imply the EXPAND fallback is doing all the work."
)
return _build_signal(
name="lpeca_overpredicting",
specificity="ambiguous",
summary=(
f"LPEC-A looks suspicious because it was capped on "
f"{n_capped} steps and bypassed on {n_bypassed} steps "
f"(out of {n_steps})."
),
detail=detail,
evidence={
"n_lpeca_capped": n_capped,
"n_lpeca_bypassed": n_bypassed,
"n_steps": n_steps,
"fraction_capped": fraction_capped,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Disable LPEC-A by setting "
"``active_set_method='expand'`` and re-run; if "
"convergence improves, LPEC-A was hurting more than "
"helping on this problem.",
"Raise ``lpeca_trust_threshold`` so the predictor is "
"trusted more often (only safe when rho_bar is a "
"reliable proxy near the optimum).",
"Raise ``lpeca_warmup_steps`` so LPEC-A waits longer before contributing.",
],
artifacts={},
offending_step=None,
)
def _eval_infeasible_termination(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: the solver terminated at an infeasible iterate.
Fires when either (a) the granular ``state.termination_code`` is
one of {``infeasible``, ``nonfinite``}, or (b) primal feasibility
was never satisfied across the run (max constraint violation
above ``atol`` on every recorded summary). Specificity:
``specific``.
Artifacts: indices and signed values of the worst-violated
equality + inequality constraints at the final iterate.
"""
if not summaries:
return None # pragma: no cover
code = final_state.termination_code
name = _result_name_safe(code)
code_match = name in {"infeasible", "nonfinite"}
last = summaries[-1]
atol = ctx.atol
feasibility_ever_satisfied = any(
s.max_eq_violation <= atol and s.max_ineq_violation <= atol for s in summaries
)
persistent_infeasibility = not feasibility_ever_satisfied and (
last.max_eq_violation > atol or last.max_ineq_violation > atol
)
if not (code_match or persistent_infeasibility):
return None
eq_val = _to_numpy(final_state.eq_val)
ineq_val = _to_numpy(final_state.ineq_val)
# Worst-violated indices (top-5).
eq_worst_idx = (
np.argsort(np.abs(eq_val))[::-1][:5]
if eq_val.size > 0
else np.array([], dtype=int)
)
ineq_violations = np.maximum(0.0, -ineq_val) if ineq_val.size > 0 else np.array([])
ineq_worst_idx = (
np.argsort(ineq_violations)[::-1][:5]
if ineq_violations.size > 0
else np.array([], dtype=int)
)
worst_violation = max(last.max_eq_violation, last.max_ineq_violation)
threshold_ratio = worst_violation / max(atol, 1e-300)
detail = (
f"The run terminated at a primally infeasible iterate "
f"(termination code = {name!r}, max|c_eq| = "
f"{last.max_eq_violation:.3e}, max(0, -c_ineq) = "
f"{last.max_ineq_violation:.3e}, atol = {atol:.1e}). "
"Either the constraints are mutually infeasible (no point "
"satisfies all of them simultaneously) or the active-set "
"machinery could not satisfy them within ``atol`` from the "
"supplied initial point. The worst-violated constraint "
"indices and signed values are attached as artifacts."
)
return _build_signal(
name="infeasible_termination",
specificity="specific",
summary=(
f"Primal feasibility looks suspicious because the run "
f"ended with max|c_eq|={last.max_eq_violation:.3e} and "
f"max(0,-c_ineq)={last.max_ineq_violation:.3e} "
f"(atol={atol:.1e})."
),
detail=detail,
evidence={
"max_eq_violation": last.max_eq_violation,
"max_ineq_violation": last.max_ineq_violation,
"atol": atol,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Print the worst-violated constraints (see artifacts) "
"and verify they admit a common feasible point.",
"Provide a feasible initial iterate, or relax the "
"constraints incrementally to find a feasibility "
"boundary.",
"If ``c_eq`` involves a divide-by-near-zero or other "
"non-Lipschitz expression, consider rewriting the "
"constraint to avoid the singularity.",
],
artifacts={
"eq_values": eq_val,
"ineq_values": ineq_val,
"eq_worst_indices": np.asarray(eq_worst_idx, dtype=int),
"ineq_worst_indices": np.asarray(ineq_worst_idx, dtype=int),
},
offending_step=None,
)
# Default ``divergence_patience`` from :class:`SLSQPConfig` (3). The
# magnitude of :func:`_eval_divergence_rollback_triggered` is
# normalised by this value so a run that triggered exactly one
# rollback fires at "marginal" magnitude and a run that triggered the
# rollback ten times fires at "moderate" / "extreme".
_DIVERGENCE_PATIENCE_DEFAULT = 3
def _eval_divergence_rollback_triggered(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: best-iterate divergence rollback fired during the run.
Cheap predicate: ``final_state.diagnostics.divergence_triggered``
is ``True`` (the rollback latched at least once) OR
``n_divergence_blowups > 0`` (a blow-up was observed even if
patience was not yet reached). The rollback is the
"best-iterate-rescue" guardrail described in ``AGENTS.md``
"Best-iterate divergence rollback"; if it fires, the run hit a
catastrophic merit blow-up and the iterate the driver returned is
``state.best_x``, not the attempted final iterate.
Specificity: ``specific``. This datum is essentially diagnostic
for "the run blew up and was rescued by rollback" — no other
failure mode flips ``divergence_triggered``.
"""
if not summaries:
return None # pragma: no cover
diag = final_state.diagnostics
triggered = bool(diag.divergence_triggered)
n_blowups = int(diag.n_divergence_blowups)
if not (triggered or n_blowups > 0):
return None
last = summaries[-1]
# Locate the offending step: first ``StepSummary`` recording a
# blow-up event (``blowup_count`` > 0 immediately after a step
# whose ``blowup_count`` was 0, or simply the first step with
# ``diverging`` set). Falls back to the final summary when no
# such step is found.
offending_step: Optional[int] = None
prev_blowup = 0
for s in summaries:
if s.diverging:
offending_step = int(s.step_count)
break
if s.blowup_count > prev_blowup:
offending_step = int(s.step_count)
break
prev_blowup = int(s.blowup_count)
if offending_step is None:
offending_step = int(last.step_count) # pragma: no cover
threshold_ratio = max(float(n_blowups), 1.0) / float(_DIVERGENCE_PATIENCE_DEFAULT)
detail = (
f"The best-iterate divergence rollback latched (triggered = "
f"{triggered}, n_divergence_blowups = {n_blowups}). This "
"means the merit function suffered a catastrophic blow-up "
"(``merit_new - best_merit > divergence_factor * "
"max(|best_merit|, 1)`` for ``divergence_patience`` "
"consecutive steps; defaults are ``10.0`` and ``3``) and the "
"iterate the driver returned was overwritten with "
"``state.best_x``. The L-BFGS history, multipliers and "
"gradients still reflect the *attempted* step, so the "
"report's 'Final iterate metrics' section may show post-"
"blow-up values that do not correspond to the actually-"
"returned iterate. See ``AGENTS.md`` 'Best-iterate "
"divergence rollback'. The first blow-up was at step "
f"{offending_step}; inspect the trajectory around there for "
"the failure mechanism (typically L-BFGS poisoning + "
"merit-penalty over-correction)."
)
return _build_signal(
name="divergence_rollback_triggered",
specificity="specific",
summary=(
"The best-iterate divergence rollback looks suspicious "
f"because it latched (triggered={triggered}) after "
f"{n_blowups} blow-up event(s); the returned iterate is "
"``best_x``, not the attempted final iterate."
),
detail=detail,
evidence={
"divergence_triggered": 1 if triggered else 0,
"n_divergence_blowups": n_blowups,
"patience_default": _DIVERGENCE_PATIENCE_DEFAULT,
"offending_step": offending_step,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Inspect the trajectory around the offending step "
"(use ``capture_state_at_step``) for an L-BFGS diagonal "
"blow-up paired with a merit-penalty jump — the usual "
"cascade is QP-budget exhaustion → noisy curvature pair "
"appended → kappa(B0) explosion → unrecoverable QP "
"direction → merit blow-up → rollback.",
"Try ``use_exact_hvp_in_qp=True`` so the QP no longer "
"depends on the L-BFGS approximation (the most common "
"way to break the cascade).",
"Lower ``divergence_patience`` so the rollback fires "
"earlier, before the post-blow-up state corrupts other "
"quantities the report surfaces.",
],
artifacts={},
offending_step=offending_step,
)
# Total-trajectory ratio threshold: ``rho`` grew by 6+ orders of
# magnitude across the run. The Han-Powell penalty update is meant
# to grow ``rho`` monotonically by small factors per step; a 1e6
# total span is a strong indicator of an over-correction cycle.
_RHO_EXPLOSION_TOTAL_RATIO = 1e6
# Per-step ratio threshold: ``rho`` jumped by 1000x or more in a
# single step. Either threshold tripping is enough.
_RHO_EXPLOSION_STEP_RATIO = 1e3
def _eval_merit_penalty_explosion(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: merit penalty ``rho`` exploded across the trajectory.
Cheap predicate: either
``max(rho) / max(min(rho), 1) > _RHO_EXPLOSION_TOTAL_RATIO``
(the trajectory-wide span exceeds 1e6) OR
``max(rho[i] / max(rho[i-1], 1)) > _RHO_EXPLOSION_STEP_RATIO``
(a single-step jump exceeds 1e3). Either pattern signals that
the Han-Powell penalty update over-corrected, usually because
the QP direction was poisoned by a degenerate inner solve and
the merit function's predicted reduction stopped agreeing with
the realised one.
Specificity: ``specific``. ``rho`` is monotone-non-decreasing
by construction, and the update cadence is bounded by problem
geometry; a 6-orders-of-magnitude span across the run or a
1000x jump in one step has a narrow set of causes (L-BFGS
poisoning + merit catch-up is the dominant one).
"""
if not summaries:
return None # pragma: no cover
rhos = [max(float(s.merit_penalty), 1.0) for s in summaries]
min_rho = min(rhos)
max_rho = max(rhos)
ratio_total = max_rho / min_rho
if len(rhos) > 1:
per_step_ratios = [rhos[i] / rhos[i - 1] for i in range(1, len(rhos))]
ratio_step = max(per_step_ratios)
# Step where the largest single-step jump happened (1-indexed
# to match ``StepSummary.step_count`` semantics).
jump_idx_zero = max(range(1, len(rhos)), key=lambda i: rhos[i] / rhos[i - 1])
jump_step = int(summaries[jump_idx_zero].step_count)
else:
ratio_step = 1.0
jump_step = int(summaries[-1].step_count)
total_fired = ratio_total > _RHO_EXPLOSION_TOTAL_RATIO
step_fired = ratio_step > _RHO_EXPLOSION_STEP_RATIO
if not (total_fired or step_fired):
return None
threshold_ratio = max(
ratio_total / _RHO_EXPLOSION_TOTAL_RATIO,
ratio_step / _RHO_EXPLOSION_STEP_RATIO,
)
detail = (
"The merit penalty ``rho`` trajectory exploded across the "
f"run: span ratio ``max(rho) / max(min(rho), 1) = "
f"{ratio_total:.3e}`` (threshold "
f"{_RHO_EXPLOSION_TOTAL_RATIO:.0e}), largest single-step "
f"jump ``{ratio_step:.3e}`` (threshold "
f"{_RHO_EXPLOSION_STEP_RATIO:.0e}, at step {jump_step}). "
"The Han-Powell update grows ``rho`` monotonically by "
"small factors per step; a span this large is the "
"merit function trying to catch up after a QP step poisoned "
"the predicted-vs-realised merit reduction (usually because "
"an L-BFGS pair from a fallback projected-gradient direction "
"was appended to the curvature buffer and corrupted the "
"subsequent Newton direction). Once ``rho`` is over-sized, "
"every subsequent line search has to absorb a much larger "
"merit derivative and typically collapses to the LS floor."
)
return _build_signal(
name="merit_penalty_explosion",
specificity="specific",
summary=(
"The merit penalty ``rho`` looks suspicious because it "
f"spanned {ratio_total:.3e}x across the run "
f"(min={min_rho:.3e}, max={max_rho:.3e}; largest "
f"single-step jump {ratio_step:.3e}x at step {jump_step})."
),
detail=detail,
evidence={
"rho_min": min_rho,
"rho_max": max_rho,
"rho_ratio_total": ratio_total,
"rho_ratio_step": ratio_step,
"rho_threshold_total": _RHO_EXPLOSION_TOTAL_RATIO,
"rho_threshold_step": _RHO_EXPLOSION_STEP_RATIO,
"jump_step": jump_step,
},
threshold_ratio=threshold_ratio,
suggestions=[
"Try ``use_exact_hvp_in_qp=True`` so the QP no longer "
"depends on the L-BFGS approximation; the merit-penalty "
"over-correction is almost always downstream of an "
"L-BFGS poisoning event.",
"Raise ``qp_max_iter`` so QP-budget exhaustion (which "
"forces the projected-gradient fallback that poisons the "
"L-BFGS history) is less likely.",
"Cap the merit penalty growth by passing a smaller "
"``proximal_mu_max`` if the equality constraints are "
"well-scaled; the ceiling prevents runaway ``rho``.",
"If the run was launched with ``auto_scale=False`` (or via "
"a path that bypasses :func:`slsqp_jax.minimize_like_scipy`), "
"enable auto-scaling -- a runaway ``rho`` is almost always "
"the merit function trying to reconcile mismatched objective "
"and constraint magnitudes. The Auto-scaling section of "
"the report (when present) lists the factors that were "
"applied.",
"The new ``auto_scale=True`` (uniform mode, default) uses a "
"single shared scalar across all constraint rows -- it "
"prevents the cascade without flattening the relative row "
"magnitudes. If your constraints have intentionally "
"heterogeneous magnitudes (e.g. a top-level budget plus "
"smaller sub-budgets) this is the right mode. If a single "
"constraint row has *vastly* different gradient magnitude "
"from the rest, ``auto_scale='balanced'`` (per-row, the "
"old default) flattens those rows individually and may "
"rescue the run. ``auto_scale='aggressive'`` raises the "
"amplification ceiling for very small-gradient problems.",
],
artifacts={
"rho_trajectory": np.asarray(rhos, dtype=float),
},
offending_step=jump_step if step_fired else None,
)
# ``rho`` is treated as "frozen" for the prefix when its span is
# below this multiplicative tolerance. ``rho`` is mutated by an
# additive-then-clip update in the solver, so a true "did not move"
# signal needs a tiny epsilon rather than equality testing.
_RHO_FROZEN_TOL = 1.0 + 1e-9
# Minimum violation growth that constitutes "drift": the worst
# infeasibility at the end of the prefix must be at least this
# multiple of the worst infeasibility at the start.
_STARVATION_VIOLATION_GROWTH = 5.0
# The frozen-prefix must span at least this many steps (and at
# least 5 absolute) to fire. Short prefixes are noise.
_STARVATION_PREFIX_FRACTION = 1.0 / 3.0
def _eval_penalty_starvation(
ctx: EvalContext,
final_state: "SLSQPState",
summaries: list["StepSummary"],
coarse_result: Any,
) -> Optional[Signal]:
"""End-of-run: ``rho`` stayed at its initial value while feasibility drifted.
Cheap predicate: the longest prefix of summaries during which
``rho`` is constant (within :data:`_RHO_FROZEN_TOL`) spans at
least ``max(5, n_steps // 3)`` steps, ``max_eq_violation`` is
monotone non-decreasing across that prefix, and grew by at
least :data:`_STARVATION_VIOLATION_GROWTH` (= 5x) from the
first to the last step of the prefix.
Catches the "started feasible, drifted off, ``rho`` never grew"
pathology described in the diagnostic notes for the feasible-
start divergence run: the Han-Powell directional-derivative test
cannot trigger a ``rho`` increase when the ``f`` reduction alone
satisfies it, so feasibility can decay silently for many steps.
Specificity: ``specific``.
"""
if not summaries:
return None # pragma: no cover
n_steps = len(summaries)
min_prefix = max(5, n_steps // 3)
if n_steps < min_prefix:
return None
# Identify the longest leading prefix where ``rho`` is frozen.
rho0 = max(float(summaries[0].merit_penalty), 1.0)
prefix_end = 0
for i, s in enumerate(summaries):
rho_i = max(float(s.merit_penalty), 1.0)
ratio = max(rho_i / rho0, rho0 / rho_i)
if ratio > _RHO_FROZEN_TOL:
break
prefix_end = i
prefix_len = prefix_end + 1
if prefix_len < min_prefix:
return None
prefix = summaries[: prefix_end + 1]
eq_violations = [float(s.max_eq_violation) for s in prefix]
ineq_violations = [float(s.max_ineq_violation) for s in prefix]
# Treat eq + ineq as a unified "infeasibility" proxy so the test
# works for both equality- and inequality-only problems.
violations = [max(e, i) for e, i in zip(eq_violations, ineq_violations)]
# Monotone non-decreasing test (with a small tolerance to
# absorb numerical jitter at the floor).
monotone = all(
violations[i] >= violations[i - 1] - 1e-15 for i in range(1, len(violations))
)
if not monotone:
return None
start_violation = violations[0]
end_violation = violations[-1]
# Need a non-zero starting violation to avoid 0/0; if it is at
# the floor, require an absolute growth above ``atol`` instead.
atol = ctx.atol
if start_violation > 0 and start_violation > atol:
growth_ratio = end_violation / start_violation
else:
# Compare against ``atol`` so a near-zero start that grew to
# ``5 * atol`` qualifies (the canonical feasible-start case).
growth_ratio = end_violation / max(atol, 1e-300)
if growth_ratio < _STARVATION_VIOLATION_GROWTH:
return None
threshold_ratio = growth_ratio / _STARVATION_VIOLATION_GROWTH
detail = (
f"The merit penalty ``rho`` stayed constant at "
f"{rho0:.3e} for the first {prefix_len} of {n_steps} "
"iterations while the worst infeasibility grew "
f"monotonically from {start_violation:.3e} to "
f"{end_violation:.3e}. This is the textbook "
"'penalty-starved feasibility drift' pattern: the "
"Han-Powell directional-derivative test cannot trigger a "
"``rho`` increase when the ``f`` reduction alone satisfies "
"the predicted-vs-realised inequality, so the merit "
"function happily accepts steps that improve ``f`` at the "
"cost of feasibility. When feasibility eventually decays "
"enough to require a real ``rho`` correction, the catch-up "
"is usually huge and downstream poisons the L-BFGS history "
"(see ``merit_penalty_explosion`` and "
"``divergence_rollback_triggered`` for the typical "
"downstream signals). See ``AGENTS.md`` 'L1 merit "
"directional derivative' for the update rule."
)
return _build_signal(
name="penalty_starvation",
specificity="specific",
summary=(
"The merit penalty ``rho`` looks suspicious because it "
f"stayed constant at {rho0:.3e} for {prefix_len} of "
f"{n_steps} steps while max-infeasibility grew "
f"{growth_ratio:.1f}x (from {start_violation:.3e} to "
f"{end_violation:.3e})."
),
detail=detail,
evidence={
"rho_initial": rho0,
"frozen_prefix_steps": prefix_len,
"n_steps": n_steps,
"violation_start": start_violation,
"violation_end": end_violation,
"growth_ratio": growth_ratio,
"growth_threshold": _STARVATION_VIOLATION_GROWTH,
},
threshold_ratio=threshold_ratio,
suggestions=[
"If the supplied initial iterate is exactly feasible, "
"perturb it slightly (e.g. ``x0 + atol * sign_vector``) "
"so the merit-penalty update mechanism warms ``rho`` up "
"before feasibility drifts.",
"Bump the initial ``merit_penalty`` so the feasibility "
"term has weight from step 1.",
"Check the magnitude balance between the constraint "
"Jacobian and the objective gradient: if "
"``eq_jac_min_sv_est`` (in the diagnostics block) is "
"much larger than ``||grad_f||``, the constraint is "
"much steeper than the objective and the merit "
"function cannot reconcile their natural scales without "
"a huge ``rho``. Rescaling the constraint (e.g. drop "
"the ``1/target`` division if the constraint is "
"``(h(x) - target) / target``) so ``||J_eq||`` is "
"comparable to ``||grad_f||`` is the durable fix.",
"If LPEC-A is enabled, raise ``lpeca_warmup_steps`` so "
"the predictor does not contaminate the early-iteration "
"active set on a near-feasible iterate.",
"If the run was launched with ``auto_scale=False`` (or via "
"a path that bypasses :func:`slsqp_jax.minimize_like_scipy`), "
"enable auto-scaling -- the ``||J_eq|| >> ||grad_f||`` "
"magnitude mismatch that triggers penalty starvation is "
"exactly what gradient-based scaling is designed to fix. "
"The Auto-scaling section of the report (when present) "
"lists the factors that were applied.",
"The new ``auto_scale=True`` (uniform mode, default) "
"preserves the relative row magnitudes between constraints "
"while bringing the constraint Jacobian into the same "
"range as the objective gradient -- the right choice when "
"your constraints encode meaningful inter-row structure "
"(e.g. a top-level budget plus smaller sub-budgets). If "
"*one* constraint row has vastly different gradient "
"magnitude from the rest and that's not a meaningful "
"spread, ``auto_scale='balanced'`` (per-row, the old "
"default) flattens those rows individually and may rescue "
"the run. ``auto_scale='aggressive'`` raises the "
"amplification ceiling.",
],
artifacts={
"rho_prefix": np.asarray(
[float(s.merit_penalty) for s in prefix], dtype=float
),
"violations_prefix": np.asarray(violations, dtype=float),
},
offending_step=int(prefix[-1].step_count),
)
def _result_name_safe(result: Any) -> str:
"""Wrap :func:`slsqp_jax.diagnostics.report._result_name` for use here.
Imported lazily to avoid a circular import: ``signals`` is
imported by ``__init__`` before ``report``.
"""
from slsqp_jax.diagnostics.report import _result_name
return _result_name(result)
# ---------------------------------------------------------------------------
# Registry installation
# ---------------------------------------------------------------------------
# Register all 8 starter signals at import time so the runner picks
# them up by default. Test code can re-register an alternative
# evaluator under the same ``name`` to shift thresholds without
# editing this module.
register_evaluator(
"eq_jacobian_rank_deficient",
specificity="specific",
flavour="per_step",
evaluator=_eval_eq_jacobian_rank_deficient,
)
register_evaluator(
"lbfgs_conditioning_extreme",
specificity="ambiguous",
flavour="per_step",
evaluator=_eval_lbfgs_conditioning_extreme,
)
register_evaluator(
"multiplier_recovery_noise",
specificity="specific",
flavour="end_of_run",
evaluator=_eval_multiplier_recovery_noise,
)
register_evaluator(
"line_search_collapse",
specificity="ambiguous",
flavour="end_of_run",
evaluator=_eval_line_search_collapse,
)
register_evaluator(
"qp_budget_or_pingpong",
specificity="generic",
flavour="end_of_run",
evaluator=_eval_qp_budget_or_pingpong,
)
register_evaluator(
"merit_oscillation",
specificity="generic",
flavour="end_of_run",
evaluator=_eval_merit_oscillation,
)
register_evaluator(
"lpeca_overpredicting",
specificity="ambiguous",
flavour="end_of_run",
evaluator=_eval_lpeca_overpredicting,
)
register_evaluator(
"infeasible_termination",
specificity="specific",
flavour="end_of_run",
evaluator=_eval_infeasible_termination,
)
register_evaluator(
"divergence_rollback_triggered",
specificity="specific",
flavour="end_of_run",
evaluator=_eval_divergence_rollback_triggered,
)
register_evaluator(
"merit_penalty_explosion",
specificity="specific",
flavour="end_of_run",
evaluator=_eval_merit_penalty_explosion,
)
register_evaluator(
"penalty_starvation",
specificity="specific",
flavour="end_of_run",
evaluator=_eval_penalty_starvation,
)
__all__ = [
"Confidence",
"EndOfRunEvaluator",
"END_OF_RUN_EVALUATORS",
"EvalContext",
"Magnitude",
"PER_STEP_EVALUATORS",
"PerStepEvaluator",
"SIGNAL_REGISTRY",
"Signal",
"SignalRegistration",
"Specificity",
"register_evaluator",
]