"""Simple SecureAggregator for OpenGrowth MVP. This is a lightweight, production-friendly utility that collects numeric samples and computes a mean with a 95% confidence interval using a t-distribution. It's intentionally small and dependency-free to ensure reliable packaging and easy reasoning during tests. """ from math import sqrt from typing import List class SecureAggregator: """Compute mean and 95% CI for a stream of numbers. - Samples are stored locally (no data is transmitted in this MVP). - CI is computed using the t-distribution with df = n - 1. For n < 2, CI is 0. """ def __init__(self, confidence: float = 0.95): if not (0 < confidence < 1): raise ValueError("confidence must be between 0 and 1") self.confidence = confidence self._samples: List[float] = [] def add_sample(self, value: float) -> None: if not isinstance(value, (int, float)): raise TypeError("sample value must be numeric") self._samples.append(float(value)) def clear(self) -> None: self._samples.clear() def _mean(self) -> float: if not self._samples: raise ValueError("no samples available to compute mean") return sum(self._samples) / len(self._samples) def _std_err(self) -> float: n = len(self._samples) if n < 2: return 0.0 mean = self._mean() # Sample standard deviation (unbiased): sqrt( sum((x-mean)^2) / (n-1) ) var = sum((x - mean) ** 2 for x in self._samples) / (n - 1) sd = sqrt(var) return sd / sqrt(n) def aggregate(self) -> dict: """Return a dict with mean and CI bounds. Example: {"mean": 12.3, "ci_low": 11.1, "ci_high": 13.5} If there are fewer than 2 samples, CI bounds are equal to the mean and 0. """ if not self._samples: raise ValueError("no samples to aggregate") mean = self._mean() n = len(self._samples) if n < 2: return {"mean": mean, "ci_low": mean, "ci_high": mean} # Use a conservative t-value for 95% CI when df = n-1 # Approximate t-values for common small n (df): [1: inf, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228] # We'll implement a simple default using the normal approximation for larger n if n >= 30: t_value = 1.959964 # approx z-score for 95% CI else: # small-sample fallback: a small lookup for a few common n values t_lookup = { 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228, 11: 2.207, 12: 2.191, 13: 2.178, 14: 2.160, 15: 2.145, 16: 2.131, 17: 2.120, 18: 2.110, 19: 2.101, 20: 2.093, 21: 2.086, 22: 2.080, 23: 2.074, 24: 2.069, 25: 2.064, 26: 2.060, 27: 2.056, 28: 2.052, 29: 2.048, 30: 2.045, } t_value = t_lookup.get(n, 1.96) se = self._std_err() half_width = t_value * se return {"mean": mean, "ci_low": mean - half_width, "ci_high": mean + half_width}