Source code for qube.memory_budget

"""Predict QUBE pipeline memory consumption from basis dimensions.

Companion to the algebraic memory analysis in Paper I §6 — analogous to
ECLIPSE_Memory.f90 but covering both QUBE pipeline paths.

Two paths are supported:

- **harmonic**: Tegmark-style V projection + SMW kernel. Persistent state
  is dominated by ``L`` (n_pix²), ``V_N_inv`` (n_modes × n_pix), and
  ``V_Ninv_VT`` (n_modes²).
- **pixel-direct**: Gjerløw-style direct pixel-space C^{-1}. No V, no
  SMW. Persistent state is dominated by ``Cov_T`` (Core retains it; Core
  only nullifies ``noise_cov1`` for non-pixel-direct modes) and
  ``basis._N`` (asfortranarray F-order copy).

For each path the calculator splits stage cost into:

- **Persistent state** at stage exit (audit-backed, ~1% accurate at the
  calibration points below).
- **Transient state** at stage peak (modelled where the algebra is
  known; assumed simultaneously alive at the peak — conservative when
  transients fire sequentially).

``StageBudget.peak_bytes = persistent_bytes + Σ transient_bytes``.

Allocator overhead, BLAS workspace, and the empirical
``fisher.compute.derivative_cache`` transient (queue item E for the
harmonic path; ~``n_params × n_pix²`` for the pixel-direct path) are not
modelled in detail. Expect ~5–10% allocator/BLAS slack at production
scale, and an additive Python/numpy overhead floor (~0.2–0.5 GiB) that
dominates at small ``n_pix``.

Calibration points:

- **Harmonic** at eclipse-QU (n_pix=59136, n_modes=33274, lmax_signal=256,
  lmax=128; mem_eclipse_qu_20114986.out, commit d11ab0b):
  basis_setup persistent 57.21 GiB predicted vs 57.80 GiB measured;
  basis_setup peak 112.6 GiB predicted vs 113.2 GiB measured.
- **Pixel-direct** at QU_nside64 fsky=0.1 (n_pix=9800, lmax=128,
  basis_lmax=256 default → switch implicit; mem_nc_20103507.out, commit
  ccabffd): basis_setup persistent (above covariance) 1.43 GiB predicted
  vs 1.44 GiB measured.
"""

from dataclasses import dataclass, field
from typing import Literal

GIBIBYTE: int = 1024**3
_BYTES_PER_DOUBLE: int = 8


[docs] @dataclass(frozen=True) class BudgetConfig: """Inputs to the QUBE harmonic-path budget. n_pix: total pixel count (2 × n_pix_observed for QU spin-2; n_pix_observed for T spin-0; sum for TQU). n_modes: total mode count after V projection. The calculator does not derive this from lmax_signal because mode counting depends on spin and m=0 exclusions. lmax_signal: signal-cov ceiling (Layer A; max ℓ in V), not the parameter-grid lmax. lmax: inference window upper (Layer B). None or equal to ``lmax_signal`` disables the switch optimisation. Below ``lmax_signal`` activates ``_compute_effective_noise`` and adds ``T`` (separate from ``V_Ninv_VT``) plus ``S_fixed`` and the corr intermediate to basis_setup. release_pixel_projector: True matches PR #16's Fisher path (V freed after SMW build). False reproduces the legacy keep-V behaviour. """ n_pix: int n_modes: int lmax_signal: int lmax: int | None = None release_pixel_projector: bool = True def __post_init__(self) -> None: if self.n_pix <= 0: raise ValueError(f"n_pix must be positive (got {self.n_pix})") if self.n_modes <= 0: raise ValueError(f"n_modes must be positive (got {self.n_modes})") if self.lmax_signal <= 0: raise ValueError(f"lmax_signal must be positive (got {self.lmax_signal})") if self.lmax is not None and self.lmax > self.lmax_signal: raise ValueError(f"lmax={self.lmax} exceeds lmax_signal={self.lmax_signal}") @property def has_switch(self) -> bool: return self.lmax is not None and self.lmax < self.lmax_signal
[docs] @dataclass(frozen=True) class PixelDirectBudgetConfig: """Inputs to the QUBE pixel-direct path budget. n_pix: total pixel count (same convention as BudgetConfig). lmax_signal: signal-cov ceiling (Layer A). Used for the auto-picker informational fields and to decide whether the implicit-switch S_fixed transient is allocated by Core (lmax_signal > params.lmax → switch implicit). n_bins: number of bandpower bins in the analysis. Drives the per-parameter ``cinv_times_dcb`` dict size during fisher_run. n_params: number of derivative parameters. ``n_bins × n_spectra`` in practice (e.g. n_bins=6 × 3 spectra = 18 at eclipse-QU). Used for the empirical fisher_run derivative-product transient. has_switch: True if Core's ``setup_computation_basis`` enters the S_fixed branch before discovering the path is pixel-direct (i.e. params.lmax < lmax_signal). At default benchmark configs (lmax_signal=4·nside, params.lmax=2·nside) this is True. The S_fixed buffer is allocated, populated, then dereferenced — but the allocator pool keeps it resident through basis_setup exit. """ n_pix: int lmax_signal: int n_bins: int n_params: int has_switch: bool = True def __post_init__(self) -> None: if self.n_pix <= 0: raise ValueError(f"n_pix must be positive (got {self.n_pix})") if self.lmax_signal <= 0: raise ValueError(f"lmax_signal must be positive (got {self.lmax_signal})") if self.n_bins <= 0: raise ValueError(f"n_bins must be positive (got {self.n_bins})") if self.n_params <= 0: raise ValueError(f"n_params must be positive (got {self.n_params})")
[docs] @dataclass class StageBudget: """Memory cost for one pipeline stage.""" name: str persistent: dict[str, int] = field(default_factory=dict) transient: dict[str, int] = field(default_factory=dict) @property def persistent_bytes(self) -> int: return sum(self.persistent.values()) @property def transient_bytes(self) -> int: return sum(self.transient.values()) @property def peak_bytes(self) -> int: return self.persistent_bytes + self.transient_bytes
[docs] @dataclass class QUBEBudget: """Full pipeline budget across all four QUBE stages.""" config: BudgetConfig | PixelDirectBudgetConfig stages: list[StageBudget] path: Literal["harmonic", "pixel_direct"] = "harmonic" @property def lifetime_peak_bytes(self) -> int: return max(s.peak_bytes for s in self.stages)
[docs] def stage(self, name: str) -> StageBudget: for s in self.stages: if s.name == name: return s raise KeyError(f"unknown stage {name!r}")
[docs] def format_table(self) -> str: return _format_table(self)
[docs] def predict_qube_budget(config: BudgetConfig) -> QUBEBudget: pix_sq = config.n_pix * config.n_pix * _BYTES_PER_DOUBLE mode_sq = config.n_modes * config.n_modes * _BYTES_PER_DOUBLE mode_pix = config.n_modes * config.n_pix * _BYTES_PER_DOUBLE covariance_setup = StageBudget( name="covariance_setup", persistent={"Cov_T": pix_sq}, transient={"Cov_T (asfortranarray copy on read)": pix_sq}, ) basis_persistent: dict[str, int] = { "L (Cholesky factor of N, in-place)": pix_sq, "V_N_inv": mode_pix, "V_Ninv_VT (M kernel)": mode_sq, } if config.has_switch: # _noise_cov_T diverges from _V_Ninv_VT only on the switch path; the # no-switch path aliases the buffer (harmonic.py:322). basis_persistent["T (noise-bias kernel, switch path)"] = mode_sq if not config.release_pixel_projector: basis_persistent["V (pixel projector)"] = mode_pix basis_transient: dict[str, int] = {} if config.release_pixel_projector: basis_transient["V (transient before release)"] = mode_pix if config.has_switch: basis_transient["S_fixed (switch optimisation)"] = pix_sq basis_transient["corr intermediate (V_N_inv @ S_fixed)"] = mode_pix basis_setup = StageBudget( name="basis_setup", persistent=basis_persistent, transient=basis_transient, ) fisher_run = StageBudget( name="fisher_run", persistent=dict(basis_persistent), ) spectra_persistent = dict(basis_persistent) spectra_persistent["noise_cov_T (Spectra)"] = mode_sq spectra_run = StageBudget( name="spectra_run", persistent=spectra_persistent, ) return QUBEBudget( config=config, stages=[covariance_setup, basis_setup, fisher_run, spectra_run], path="harmonic", )
[docs] def predict_pixel_direct_budget(config: PixelDirectBudgetConfig) -> QUBEBudget: """Predict QUBE memory budget on the pixel-direct path.""" pix_sq = config.n_pix * config.n_pix * _BYTES_PER_DOUBLE covariance_setup = StageBudget( name="covariance_setup", persistent={"Cov_T (Core retains; not nullified for pixel-direct)": pix_sq}, transient={"Cov_T (asfortranarray copy on read)": pix_sq}, ) # basis_setup adds two pix_sq terms above covariance_setup. The first # is structural (asfortranarray F-order copy at base.py:148). The # second only appears when Core's setup_computation_basis enters the # S_fixed allocation branch before discovering the path is # pixel-direct — the buffer is dereferenced at line 610 of core.py # but the allocator pool keeps it resident until basis_setup exit. basis_persistent: dict[str, int] = { "Cov_T (carried from covariance_setup)": pix_sq, "basis._N (asfortranarray F-order copy)": pix_sq, } if config.has_switch: basis_persistent["S_fixed (allocator pool retained)"] = pix_sq basis_setup = StageBudget(name="basis_setup", persistent=basis_persistent) # fisher_run: C_inv (full pixel-space inverse) plus the cinv_times_dcb # dict of n_params dense n_pix² products held through trace_loop. fisher_persistent = dict(basis_persistent) fisher_persistent["_direct_pix_buffer (lazy on first derivative)"] = pix_sq fisher_transient = { "C_inv (basis_manager.get_projected_inverse)": pix_sq, "cinv_times_dcb (n_params dense pixel matrices)": config.n_params * pix_sq, } fisher_run = StageBudget( name="fisher_run", persistent=fisher_persistent, transient=fisher_transient, ) # Spectra noise-bias path: noise_cov_w = C_bar_inv @ N_bar @ C_bar_inv # is n_pix² in pixel-direct (no compression to compress to). Per-bin # derivative products are recomputed from C_inv and dC_b similarly to # Fisher — same transient shape but bounded by spectra parameter count. spectra_persistent = dict(fisher_persistent) spectra_persistent["noise_cov_w (Spectra)"] = pix_sq # Spectra recomputes derivatives when cache_derivatives=False — peak # transient empirically scales with n_bins (per-bin C^{-1} dC products # held during the inner loop). The ceiling here over-predicts measured # spectra peaks by ~20% at production scales; that's the acceptable # bound for cluster sizing. spectra_run = StageBudget( name="spectra_run", persistent=spectra_persistent, transient={ "C_inv per parameter point": pix_sq, "per-bin C^{-1} dC products": config.n_bins * pix_sq, }, ) return QUBEBudget( config=config, stages=[covariance_setup, basis_setup, fisher_run, spectra_run], path="pixel_direct", )
def _format_bytes(b: int) -> str: return f"{b / GIBIBYTE:9.2f} GiB" def _format_table(budget: QUBEBudget) -> str: cfg = budget.config if isinstance(cfg, BudgetConfig): switch_str = f"lmax={cfg.lmax}" if cfg.lmax is not None else "no switch" header = ( f" n_pix={cfg.n_pix} n_modes={cfg.n_modes} lmax_signal={cfg.lmax_signal}" f" {switch_str} release_V={cfg.release_pixel_projector}" ) else: switch_str = "switch implicit" if cfg.has_switch else "no switch" header = ( f" n_pix={cfg.n_pix} lmax_signal={cfg.lmax_signal}" f" n_bins={cfg.n_bins} n_params={cfg.n_params} {switch_str}" ) lines = [ f"QUBE memory budget [{budget.path}]", header, "", ] name_width = 48 for stage in budget.stages: lines.append( f"[{stage.name}] persistent={_format_bytes(stage.persistent_bytes)}" f" peak={_format_bytes(stage.peak_bytes)}" ) if stage.persistent: lines.append(" persistent:") for term, sz in stage.persistent.items(): lines.append(f" {term:<{name_width}}{_format_bytes(sz)}") if stage.transient: lines.append(" transient (live at stage peak):") for term, sz in stage.transient.items(): lines.append(f" {term:<{name_width}}{_format_bytes(sz)}") lines.append("") lines.append( f"Lifetime peak: {_format_bytes(budget.lifetime_peak_bytes)}" " (above Python baseline RSS, ~25–30 GiB at typical configs)" ) lines.append( "Excludes: BLAS scratch, allocator overhead, derivative_cache transient" " (~5 × n_modes² × 8 B at eclipse-QU)." ) return "\n".join(lines) def _main() -> None: # pragma: no cover - CLI entry point import argparse parser = argparse.ArgumentParser( description="Predict QUBE pipeline memory consumption from basis dimensions", ) parser.add_argument( "--path", choices=["harmonic", "pixel_direct"], default="harmonic", help="QUBE pipeline path to model", ) parser.add_argument( "--n-pix", type=int, required=True, help="total pixel count (Q+U for spin-2)" ) parser.add_argument( "--lmax-signal", type=int, required=True, help="signal-cov ceiling (Layer A)" ) # Harmonic-only parser.add_argument( "--n-modes", type=int, default=None, help="(harmonic) total mode count after V" ) parser.add_argument( "--lmax", type=int, default=None, help="(harmonic) inference window upper (Layer B); omit to disable switch", ) parser.add_argument( "--keep-pixel-projector", action="store_true", help="(harmonic) legacy: do not release V after SMW build", ) # Pixel-direct only parser.add_argument( "--n-bins", type=int, default=None, help="(pixel_direct) number of bandpower bins" ) parser.add_argument( "--n-params", type=int, default=None, help="(pixel_direct) total derivative parameter count", ) parser.add_argument( "--no-switch", action="store_true", help="(pixel_direct) Core did not allocate S_fixed transient", ) args = parser.parse_args() if args.path == "harmonic": if args.n_modes is None: parser.error("--n-modes is required for --path harmonic") config = BudgetConfig( n_pix=args.n_pix, n_modes=args.n_modes, lmax_signal=args.lmax_signal, lmax=args.lmax, release_pixel_projector=not args.keep_pixel_projector, ) print(predict_qube_budget(config).format_table()) else: if args.n_bins is None or args.n_params is None: parser.error("--n-bins and --n-params are required for --path pixel_direct") config = PixelDirectBudgetConfig( n_pix=args.n_pix, lmax_signal=args.lmax_signal, n_bins=args.n_bins, n_params=args.n_params, has_switch=not args.no_switch, ) print(predict_pixel_direct_budget(config).format_table()) if __name__ == "__main__": _main()