build(agent): molt-z#db0ec5 iteration

This commit is contained in:
agent-db0ec53c058f1326 2026-04-16 22:47:42 +02:00
parent ec7a33d31f
commit 5bafb64620
9 changed files with 312 additions and 14 deletions

View File

@ -1,19 +1,38 @@
# OpenGrowth Privacy-Preserving Federated (MVP)
OpenGrowth Privacy-Preserving Federated Growth (MVP)
This repository contains a minimal, self-contained Python MVP for a privacy-preserving federated growth experimentation platform.
Overview
- A privacy-preserving federated platform enabling startups to run, share, and benchmark growth experiments
(pricing, onboarding, activation, funnel optimization) without exposing raw user data.
- Local metrics are retained by each startup; secure aggregation yields aggregated results with confidence intervals.
- Data contracts, a schema registry, and a lightweight Graph-of-Contracts enable governance and cross-startup benchmarking.
- New: DSL sketch and bridge for CatOpt-inspired federation primitives
- opengrowth_privacy_preserving_federated_/dsl.py provides LocalExperiment, SharedSignal, and PlanDelta data models as a minimal DSL surface.
- opengrowth_privacy_preserving_federated_/adapters/dsl_bridge.py offers a small bridge to convert local metrics via existing adapters (GA4Adapter, SegmentAdapter) into a canonical representation for federation.
Core MVP Components (visible in this repo)
- opengrowth_privacy_preserving_federated_
- SchemaRegistry / ExperimentTemplate (minimal MVP) with tests validating basic behavior.
- opengrowth_privacy_preserving_federated_/contracts.py
- LocalExperiment, SharedSignals, PlanDelta: lightweight DSL primitives for local experiments and cross-start signals.
- GraphOfContracts: tiny in-process registry for versioned contracts.
- opengrowth_privacy_preserving_federated_/__init__.py
- Exposes core MVP primitives and the new contract primitives for iterative federation development.
- Exposes a lightweight API surface used by tests:
- SchemaRegistry, ExperimentTemplate
- SecureAggregator, CloudLedger, AccessControl, Governance
- GA4Adapter, SegmentAdapter
- Includes a tiny in-repo implementation that can be extended later to integrate real adapters and secure aggregation techniques.
How to run tests and build locally
- Install in editable mode and run tests:
- bash test.sh
- This ensures packaging works and the pytest suite passes.
Build and test
- The project uses pyproject.toml with setuptools. Use `bash test.sh` to run tests and packaging checks.
Extending OpenGrowth (Recommended next steps)
- Implement a REST/MQTT adapter layer to connect to common analytics stacks (GA4, Segment, Amplitude) and CRM pipelines.
- Expand the governance layer with versioned templates, access controls, and audit logs.
- Implement a real secure aggregation backend (e.g., ADMM, DP knobs) and a cloud-anchored ledger for reproducibility.
- Build a small developer-focused DSL sketch and toy adapters to bootstrap cross-startup federation (CatOpt-like bridge).
For maintainers
- See AGENTS.md for architecture and contribution guidelines.
Notes
- This repository is designed as a stepping-stone toward a production-grade platform. The MVP is intentionally small but designed for extension with minimal surface area impact.
- The test suite validates the core MVP contracts and aggregation primitives; ongoing work should extend tests for new features.
For contributors
- Keep changes small and well-scoped; add tests for any public API you introduce.
- Update AGENTS.md if architecture or contribution rules evolve.
Ready for publish marker
- After validating all requirements against the Original Idea Description, create an empty READY_TO_PUBLISH file at repo root (this file is created by the final publish step).

View File

@ -0,0 +1,5 @@
"""OpenGrowth privacy-preserving federated experiments package."""
from .aggregator import SecureAggregator
__all__ = ["SecureAggregator"]

View File

@ -0,0 +1,102 @@
"""Simple SecureAggregator for OpenGrowth MVP.
This is a lightweight, production-friendly utility that collects numeric samples
and computes a mean with a 95% confidence interval using a t-distribution.
It's intentionally small and dependency-free to ensure reliable packaging and
easy reasoning during tests.
"""
from math import sqrt
from typing import List
class SecureAggregator:
"""Compute mean and 95% CI for a stream of numbers.
- Samples are stored locally (no data is transmitted in this MVP).
- CI is computed using the t-distribution with df = n - 1. For n < 2, CI is 0.
"""
def __init__(self, confidence: float = 0.95):
if not (0 < confidence < 1):
raise ValueError("confidence must be between 0 and 1")
self.confidence = confidence
self._samples: List[float] = []
def add_sample(self, value: float) -> None:
if not isinstance(value, (int, float)):
raise TypeError("sample value must be numeric")
self._samples.append(float(value))
def clear(self) -> None:
self._samples.clear()
def _mean(self) -> float:
if not self._samples:
raise ValueError("no samples available to compute mean")
return sum(self._samples) / len(self._samples)
def _std_err(self) -> float:
n = len(self._samples)
if n < 2:
return 0.0
mean = self._mean()
# Sample standard deviation (unbiased): sqrt( sum((x-mean)^2) / (n-1) )
var = sum((x - mean) ** 2 for x in self._samples) / (n - 1)
sd = sqrt(var)
return sd / sqrt(n)
def aggregate(self) -> dict:
"""Return a dict with mean and CI bounds.
Example: {"mean": 12.3, "ci_low": 11.1, "ci_high": 13.5}
If there are fewer than 2 samples, CI bounds are equal to the mean and 0.
"""
if not self._samples:
raise ValueError("no samples to aggregate")
mean = self._mean()
n = len(self._samples)
if n < 2:
return {"mean": mean, "ci_low": mean, "ci_high": mean}
# Use a conservative t-value for 95% CI when df = n-1
# Approximate t-values for common small n (df): [1: inf, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228]
# We'll implement a simple default using the normal approximation for larger n
if n >= 30:
t_value = 1.959964 # approx z-score for 95% CI
else:
# small-sample fallback: a small lookup for a few common n values
t_lookup = {
2: 4.303,
3: 3.182,
4: 2.776,
5: 2.571,
6: 2.447,
7: 2.365,
8: 2.306,
9: 2.262,
10: 2.228,
11: 2.207,
12: 2.191,
13: 2.178,
14: 2.160,
15: 2.145,
16: 2.131,
17: 2.120,
18: 2.110,
19: 2.101,
20: 2.093,
21: 2.086,
22: 2.080,
23: 2.074,
24: 2.069,
25: 2.064,
26: 2.060,
27: 2.056,
28: 2.052,
29: 2.048,
30: 2.045,
}
t_value = t_lookup.get(n, 1.96)
se = self._std_err()
half_width = t_value * se
return {"mean": mean, "ci_low": mean - half_width, "ci_high": mean + half_width}

View File

@ -77,3 +77,6 @@ class SegmentAdapter:
def fill(self, metrics: dict) -> dict:
# Pass-through in this MVP
return dict(metrics)
# Re-export minimal contract primitives for federation extensions
from .contracts import LocalExperiment, SharedSignals, PlanDelta, GraphOfContracts # type: ignore

View File

@ -0,0 +1,85 @@
"""Contract primitives for OpenGrowth federated experiments.
A tiny, production-friendly DSL-inspired layer that models local experiments,
shared signals and plan deltas. This provides a foundation for a Graph-of-Contracts
style federation layer without pulling in heavyweight dependencies.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, Optional
@dataclass
class LocalExperiment:
"""Represents a local experiment definition.
- name: human-friendly name of the experiment
- variables: mapping of controllable variables (e.g., price, onboarding steps)
- privacy_budget: optional privacy budget hint for local DP or aggregation limits
- metadata: optional extra information
"""
name: str
variables: Dict[str, Any] = field(default_factory=dict)
privacy_budget: Optional[Dict[str, Any]] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"variables": self.variables,
"privacy_budget": self.privacy_budget,
"metadata": self.metadata,
}
@dataclass
class SharedSignals:
"""Represents signals shared after local aggregation (aggregated metrics).
- signals: a dictionary of metric_name -> aggregated_value
- provenance: optional metadata about sources
"""
signals: Dict[str, Any] = field(default_factory=dict)
provenance: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {"signals": self.signals, "provenance": self.provenance}
@dataclass
class PlanDelta:
"""Represents incremental updates from a local experiment.
- delta: map of changed variables or outcomes
- timestamp: ISO timestamp of the delta
- note: optional human note
"""
delta: Dict[str, Any] = field(default_factory=dict)
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
note: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {"delta": self.delta, "timestamp": self.timestamp, "note": self.note}
class GraphOfContracts:
"""Lightweight in-process registry for contracts.
This is purposely simple: it stores versioned contracts by a key. It is
designed to be extended with cryptographic signing and remote transport later.
"""
def __init__(self) -> None:
self._contracts: Dict[str, Dict[str, Any]] = {}
def register_contract(self, name: str, contract: Dict[str, Any], version: str = "1.0") -> None:
self._contracts[name] = {"version": version, "contract": contract}
def get_contract(self, name: str) -> Optional[Dict[str, Any]]:
entry = self._contracts.get(name)
if not entry:
return None
# return a shallow copy to avoid external mutation
return {"version": entry["version"], "contract": dict(entry["contract"])}

View File

@ -8,3 +8,8 @@ version = "0.1.0"
description = "Minimal MVP for privacy-preserving federated experiments (OpenGrowth)"
readme = "README.md"
license = {text = "MIT"}
[tool.setuptools.packages.find]
where = ["."]
include = ["opengrowth_privacy_preserving_federated"]

32
sitecustomize.py Normal file
View File

@ -0,0 +1,32 @@
"""Site customization to ensure tests can import legacy underscore package
namespaced modules when running in editable install contexts.
This hack loads the local opengrowth_privacy_preserving_federated_ package
so imports like `from opengrowth_privacy_preserving_federated_ import ...`
work reliably regardless of packaging discovery.
"""
import importlib.util
import sys
import os
def _load_underscore_package_alias():
# Path to the local underscore package __init__.py
base_dir = os.path.dirname(os.path.abspath(__file__))
init_path = os.path.join(base_dir, "opengrowth_privacy_preserving_federated_", "__init__.py")
if not os.path.exists(init_path):
return
spec = importlib.util.spec_from_file_location(
"opengrowth_privacy_preserving_federated_", init_path
)
if spec and spec.loader:
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore
sys.modules["opengrowth_privacy_preserving_federated_"] = module
try:
_load_underscore_package_alias()
except Exception:
# Do not fail test startup if this heuristic cannot be applied
pass

22
tests/conftest.py Normal file
View File

@ -0,0 +1,22 @@
import importlib
import sys
import os
def _load_underscore_alias():
try:
# Ensure repo root is on sys.path so local packages can be discovered
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if repo_root not in sys.path:
sys.path.insert(0, repo_root)
# Import the local underscore package as a normal package so its
# relative imports resolve correctly.
module = importlib.import_module("opengrowth_privacy_preserving_federated_")
sys.modules["opengrowth_privacy_preserving_federated_"] = module
except Exception:
# If the local package cannot be imported for any reason, skip aliasing.
pass
_load_underscore_alias()

25
tests/test_aggregator.py Normal file
View File

@ -0,0 +1,25 @@
import pytest
from opengrowth_privacy_preserving_federated import SecureAggregator
def test_mean_and_ci_basic():
agg = SecureAggregator()
samples = [1.0, 2.0, 3.0, 4.0, 5.0]
for s in samples:
agg.add_sample(s)
res = agg.aggregate()
assert abs(res["mean"] - 3.0) < 1e-9
assert res["ci_low"] <= res["mean"] <= res["ci_high"]
def test_no_samples_raises():
agg = SecureAggregator()
with pytest.raises(ValueError):
agg.aggregate()
def test_non_numeric_sample_raises():
agg = SecureAggregator()
with pytest.raises(TypeError):
agg.add_sample("not-a-number")