build(agent): molt-z#db0ec5 iteration
This commit is contained in:
parent
ec7a33d31f
commit
5bafb64620
47
README.md
47
README.md
|
|
@ -1,19 +1,38 @@
|
|||
# OpenGrowth Privacy-Preserving Federated (MVP)
|
||||
OpenGrowth Privacy-Preserving Federated Growth (MVP)
|
||||
|
||||
This repository contains a minimal, self-contained Python MVP for a privacy-preserving federated growth experimentation platform.
|
||||
Overview
|
||||
- A privacy-preserving federated platform enabling startups to run, share, and benchmark growth experiments
|
||||
(pricing, onboarding, activation, funnel optimization) without exposing raw user data.
|
||||
- Local metrics are retained by each startup; secure aggregation yields aggregated results with confidence intervals.
|
||||
- Data contracts, a schema registry, and a lightweight Graph-of-Contracts enable governance and cross-startup benchmarking.
|
||||
|
||||
- New: DSL sketch and bridge for CatOpt-inspired federation primitives
|
||||
- opengrowth_privacy_preserving_federated_/dsl.py provides LocalExperiment, SharedSignal, and PlanDelta data models as a minimal DSL surface.
|
||||
- opengrowth_privacy_preserving_federated_/adapters/dsl_bridge.py offers a small bridge to convert local metrics via existing adapters (GA4Adapter, SegmentAdapter) into a canonical representation for federation.
|
||||
Core MVP Components (visible in this repo)
|
||||
- opengrowth_privacy_preserving_federated_
|
||||
- SchemaRegistry / ExperimentTemplate (minimal MVP) with tests validating basic behavior.
|
||||
- opengrowth_privacy_preserving_federated_/contracts.py
|
||||
- LocalExperiment, SharedSignals, PlanDelta: lightweight DSL primitives for local experiments and cross-start signals.
|
||||
- GraphOfContracts: tiny in-process registry for versioned contracts.
|
||||
- opengrowth_privacy_preserving_federated_/__init__.py
|
||||
- Exposes core MVP primitives and the new contract primitives for iterative federation development.
|
||||
|
||||
- Exposes a lightweight API surface used by tests:
|
||||
- SchemaRegistry, ExperimentTemplate
|
||||
- SecureAggregator, CloudLedger, AccessControl, Governance
|
||||
- GA4Adapter, SegmentAdapter
|
||||
- Includes a tiny in-repo implementation that can be extended later to integrate real adapters and secure aggregation techniques.
|
||||
How to run tests and build locally
|
||||
- Install in editable mode and run tests:
|
||||
- bash test.sh
|
||||
- This ensures packaging works and the pytest suite passes.
|
||||
|
||||
Build and test
|
||||
- The project uses pyproject.toml with setuptools. Use `bash test.sh` to run tests and packaging checks.
|
||||
Extending OpenGrowth (Recommended next steps)
|
||||
- Implement a REST/MQTT adapter layer to connect to common analytics stacks (GA4, Segment, Amplitude) and CRM pipelines.
|
||||
- Expand the governance layer with versioned templates, access controls, and audit logs.
|
||||
- Implement a real secure aggregation backend (e.g., ADMM, DP knobs) and a cloud-anchored ledger for reproducibility.
|
||||
- Build a small developer-focused DSL sketch and toy adapters to bootstrap cross-startup federation (CatOpt-like bridge).
|
||||
|
||||
For maintainers
|
||||
- See AGENTS.md for architecture and contribution guidelines.
|
||||
Notes
|
||||
- This repository is designed as a stepping-stone toward a production-grade platform. The MVP is intentionally small but designed for extension with minimal surface area impact.
|
||||
- The test suite validates the core MVP contracts and aggregation primitives; ongoing work should extend tests for new features.
|
||||
|
||||
For contributors
|
||||
- Keep changes small and well-scoped; add tests for any public API you introduce.
|
||||
- Update AGENTS.md if architecture or contribution rules evolve.
|
||||
|
||||
Ready for publish marker
|
||||
- After validating all requirements against the Original Idea Description, create an empty READY_TO_PUBLISH file at repo root (this file is created by the final publish step).
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
"""OpenGrowth privacy-preserving federated experiments package."""
|
||||
|
||||
from .aggregator import SecureAggregator
|
||||
|
||||
__all__ = ["SecureAggregator"]
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
"""Simple SecureAggregator for OpenGrowth MVP.
|
||||
|
||||
This is a lightweight, production-friendly utility that collects numeric samples
|
||||
and computes a mean with a 95% confidence interval using a t-distribution.
|
||||
It's intentionally small and dependency-free to ensure reliable packaging and
|
||||
easy reasoning during tests.
|
||||
"""
|
||||
from math import sqrt
|
||||
from typing import List
|
||||
|
||||
|
||||
class SecureAggregator:
|
||||
"""Compute mean and 95% CI for a stream of numbers.
|
||||
|
||||
- Samples are stored locally (no data is transmitted in this MVP).
|
||||
- CI is computed using the t-distribution with df = n - 1. For n < 2, CI is 0.
|
||||
"""
|
||||
|
||||
def __init__(self, confidence: float = 0.95):
|
||||
if not (0 < confidence < 1):
|
||||
raise ValueError("confidence must be between 0 and 1")
|
||||
self.confidence = confidence
|
||||
self._samples: List[float] = []
|
||||
|
||||
def add_sample(self, value: float) -> None:
|
||||
if not isinstance(value, (int, float)):
|
||||
raise TypeError("sample value must be numeric")
|
||||
self._samples.append(float(value))
|
||||
|
||||
def clear(self) -> None:
|
||||
self._samples.clear()
|
||||
|
||||
def _mean(self) -> float:
|
||||
if not self._samples:
|
||||
raise ValueError("no samples available to compute mean")
|
||||
return sum(self._samples) / len(self._samples)
|
||||
|
||||
def _std_err(self) -> float:
|
||||
n = len(self._samples)
|
||||
if n < 2:
|
||||
return 0.0
|
||||
mean = self._mean()
|
||||
# Sample standard deviation (unbiased): sqrt( sum((x-mean)^2) / (n-1) )
|
||||
var = sum((x - mean) ** 2 for x in self._samples) / (n - 1)
|
||||
sd = sqrt(var)
|
||||
return sd / sqrt(n)
|
||||
|
||||
def aggregate(self) -> dict:
|
||||
"""Return a dict with mean and CI bounds.
|
||||
|
||||
Example: {"mean": 12.3, "ci_low": 11.1, "ci_high": 13.5}
|
||||
If there are fewer than 2 samples, CI bounds are equal to the mean and 0.
|
||||
"""
|
||||
if not self._samples:
|
||||
raise ValueError("no samples to aggregate")
|
||||
mean = self._mean()
|
||||
n = len(self._samples)
|
||||
if n < 2:
|
||||
return {"mean": mean, "ci_low": mean, "ci_high": mean}
|
||||
|
||||
# Use a conservative t-value for 95% CI when df = n-1
|
||||
# Approximate t-values for common small n (df): [1: inf, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228]
|
||||
# We'll implement a simple default using the normal approximation for larger n
|
||||
if n >= 30:
|
||||
t_value = 1.959964 # approx z-score for 95% CI
|
||||
else:
|
||||
# small-sample fallback: a small lookup for a few common n values
|
||||
t_lookup = {
|
||||
2: 4.303,
|
||||
3: 3.182,
|
||||
4: 2.776,
|
||||
5: 2.571,
|
||||
6: 2.447,
|
||||
7: 2.365,
|
||||
8: 2.306,
|
||||
9: 2.262,
|
||||
10: 2.228,
|
||||
11: 2.207,
|
||||
12: 2.191,
|
||||
13: 2.178,
|
||||
14: 2.160,
|
||||
15: 2.145,
|
||||
16: 2.131,
|
||||
17: 2.120,
|
||||
18: 2.110,
|
||||
19: 2.101,
|
||||
20: 2.093,
|
||||
21: 2.086,
|
||||
22: 2.080,
|
||||
23: 2.074,
|
||||
24: 2.069,
|
||||
25: 2.064,
|
||||
26: 2.060,
|
||||
27: 2.056,
|
||||
28: 2.052,
|
||||
29: 2.048,
|
||||
30: 2.045,
|
||||
}
|
||||
t_value = t_lookup.get(n, 1.96)
|
||||
se = self._std_err()
|
||||
half_width = t_value * se
|
||||
return {"mean": mean, "ci_low": mean - half_width, "ci_high": mean + half_width}
|
||||
|
|
@ -77,3 +77,6 @@ class SegmentAdapter:
|
|||
def fill(self, metrics: dict) -> dict:
|
||||
# Pass-through in this MVP
|
||||
return dict(metrics)
|
||||
|
||||
# Re-export minimal contract primitives for federation extensions
|
||||
from .contracts import LocalExperiment, SharedSignals, PlanDelta, GraphOfContracts # type: ignore
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
"""Contract primitives for OpenGrowth federated experiments.
|
||||
|
||||
A tiny, production-friendly DSL-inspired layer that models local experiments,
|
||||
shared signals and plan deltas. This provides a foundation for a Graph-of-Contracts
|
||||
style federation layer without pulling in heavyweight dependencies.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocalExperiment:
|
||||
"""Represents a local experiment definition.
|
||||
|
||||
- name: human-friendly name of the experiment
|
||||
- variables: mapping of controllable variables (e.g., price, onboarding steps)
|
||||
- privacy_budget: optional privacy budget hint for local DP or aggregation limits
|
||||
- metadata: optional extra information
|
||||
"""
|
||||
name: str
|
||||
variables: Dict[str, Any] = field(default_factory=dict)
|
||||
privacy_budget: Optional[Dict[str, Any]] = None
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"variables": self.variables,
|
||||
"privacy_budget": self.privacy_budget,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharedSignals:
|
||||
"""Represents signals shared after local aggregation (aggregated metrics).
|
||||
|
||||
- signals: a dictionary of metric_name -> aggregated_value
|
||||
- provenance: optional metadata about sources
|
||||
"""
|
||||
signals: Dict[str, Any] = field(default_factory=dict)
|
||||
provenance: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"signals": self.signals, "provenance": self.provenance}
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlanDelta:
|
||||
"""Represents incremental updates from a local experiment.
|
||||
|
||||
- delta: map of changed variables or outcomes
|
||||
- timestamp: ISO timestamp of the delta
|
||||
- note: optional human note
|
||||
"""
|
||||
delta: Dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
|
||||
note: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"delta": self.delta, "timestamp": self.timestamp, "note": self.note}
|
||||
|
||||
|
||||
class GraphOfContracts:
|
||||
"""Lightweight in-process registry for contracts.
|
||||
|
||||
This is purposely simple: it stores versioned contracts by a key. It is
|
||||
designed to be extended with cryptographic signing and remote transport later.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._contracts: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def register_contract(self, name: str, contract: Dict[str, Any], version: str = "1.0") -> None:
|
||||
self._contracts[name] = {"version": version, "contract": contract}
|
||||
|
||||
def get_contract(self, name: str) -> Optional[Dict[str, Any]]:
|
||||
entry = self._contracts.get(name)
|
||||
if not entry:
|
||||
return None
|
||||
# return a shallow copy to avoid external mutation
|
||||
return {"version": entry["version"], "contract": dict(entry["contract"])}
|
||||
|
|
@ -8,3 +8,8 @@ version = "0.1.0"
|
|||
description = "Minimal MVP for privacy-preserving federated experiments (OpenGrowth)"
|
||||
readme = "README.md"
|
||||
license = {text = "MIT"}
|
||||
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["opengrowth_privacy_preserving_federated"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
"""Site customization to ensure tests can import legacy underscore package
|
||||
namespaced modules when running in editable install contexts.
|
||||
|
||||
This hack loads the local opengrowth_privacy_preserving_federated_ package
|
||||
so imports like `from opengrowth_privacy_preserving_federated_ import ...`
|
||||
work reliably regardless of packaging discovery.
|
||||
"""
|
||||
import importlib.util
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def _load_underscore_package_alias():
|
||||
# Path to the local underscore package __init__.py
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
init_path = os.path.join(base_dir, "opengrowth_privacy_preserving_federated_", "__init__.py")
|
||||
if not os.path.exists(init_path):
|
||||
return
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"opengrowth_privacy_preserving_federated_", init_path
|
||||
)
|
||||
if spec and spec.loader:
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module) # type: ignore
|
||||
sys.modules["opengrowth_privacy_preserving_federated_"] = module
|
||||
|
||||
|
||||
try:
|
||||
_load_underscore_package_alias()
|
||||
except Exception:
|
||||
# Do not fail test startup if this heuristic cannot be applied
|
||||
pass
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
import importlib
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def _load_underscore_alias():
|
||||
try:
|
||||
# Ensure repo root is on sys.path so local packages can be discovered
|
||||
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if repo_root not in sys.path:
|
||||
sys.path.insert(0, repo_root)
|
||||
|
||||
# Import the local underscore package as a normal package so its
|
||||
# relative imports resolve correctly.
|
||||
module = importlib.import_module("opengrowth_privacy_preserving_federated_")
|
||||
sys.modules["opengrowth_privacy_preserving_federated_"] = module
|
||||
except Exception:
|
||||
# If the local package cannot be imported for any reason, skip aliasing.
|
||||
pass
|
||||
|
||||
|
||||
_load_underscore_alias()
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
import pytest
|
||||
|
||||
from opengrowth_privacy_preserving_federated import SecureAggregator
|
||||
|
||||
|
||||
def test_mean_and_ci_basic():
|
||||
agg = SecureAggregator()
|
||||
samples = [1.0, 2.0, 3.0, 4.0, 5.0]
|
||||
for s in samples:
|
||||
agg.add_sample(s)
|
||||
res = agg.aggregate()
|
||||
assert abs(res["mean"] - 3.0) < 1e-9
|
||||
assert res["ci_low"] <= res["mean"] <= res["ci_high"]
|
||||
|
||||
|
||||
def test_no_samples_raises():
|
||||
agg = SecureAggregator()
|
||||
with pytest.raises(ValueError):
|
||||
agg.aggregate()
|
||||
|
||||
|
||||
def test_non_numeric_sample_raises():
|
||||
agg = SecureAggregator()
|
||||
with pytest.raises(TypeError):
|
||||
agg.add_sample("not-a-number")
|
||||
Loading…
Reference in New Issue