diff --git a/README.md b/README.md index 9e5e26b..a0cfe67 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,38 @@ -# OpenGrowth Privacy-Preserving Federated (MVP) +OpenGrowth Privacy-Preserving Federated Growth (MVP) -This repository contains a minimal, self-contained Python MVP for a privacy-preserving federated growth experimentation platform. +Overview +- A privacy-preserving federated platform enabling startups to run, share, and benchmark growth experiments + (pricing, onboarding, activation, funnel optimization) without exposing raw user data. +- Local metrics are retained by each startup; secure aggregation yields aggregated results with confidence intervals. +- Data contracts, a schema registry, and a lightweight Graph-of-Contracts enable governance and cross-startup benchmarking. -- New: DSL sketch and bridge for CatOpt-inspired federation primitives - - opengrowth_privacy_preserving_federated_/dsl.py provides LocalExperiment, SharedSignal, and PlanDelta data models as a minimal DSL surface. - - opengrowth_privacy_preserving_federated_/adapters/dsl_bridge.py offers a small bridge to convert local metrics via existing adapters (GA4Adapter, SegmentAdapter) into a canonical representation for federation. +Core MVP Components (visible in this repo) +- opengrowth_privacy_preserving_federated_ + - SchemaRegistry / ExperimentTemplate (minimal MVP) with tests validating basic behavior. +- opengrowth_privacy_preserving_federated_/contracts.py + - LocalExperiment, SharedSignals, PlanDelta: lightweight DSL primitives for local experiments and cross-start signals. + - GraphOfContracts: tiny in-process registry for versioned contracts. +- opengrowth_privacy_preserving_federated_/__init__.py + - Exposes core MVP primitives and the new contract primitives for iterative federation development. -- Exposes a lightweight API surface used by tests: - - SchemaRegistry, ExperimentTemplate - - SecureAggregator, CloudLedger, AccessControl, Governance - - GA4Adapter, SegmentAdapter -- Includes a tiny in-repo implementation that can be extended later to integrate real adapters and secure aggregation techniques. +How to run tests and build locally +- Install in editable mode and run tests: + - bash test.sh +- This ensures packaging works and the pytest suite passes. -Build and test -- The project uses pyproject.toml with setuptools. Use `bash test.sh` to run tests and packaging checks. +Extending OpenGrowth (Recommended next steps) +- Implement a REST/MQTT adapter layer to connect to common analytics stacks (GA4, Segment, Amplitude) and CRM pipelines. +- Expand the governance layer with versioned templates, access controls, and audit logs. +- Implement a real secure aggregation backend (e.g., ADMM, DP knobs) and a cloud-anchored ledger for reproducibility. +- Build a small developer-focused DSL sketch and toy adapters to bootstrap cross-startup federation (CatOpt-like bridge). -For maintainers -- See AGENTS.md for architecture and contribution guidelines. +Notes +- This repository is designed as a stepping-stone toward a production-grade platform. The MVP is intentionally small but designed for extension with minimal surface area impact. +- The test suite validates the core MVP contracts and aggregation primitives; ongoing work should extend tests for new features. + +For contributors +- Keep changes small and well-scoped; add tests for any public API you introduce. +- Update AGENTS.md if architecture or contribution rules evolve. + +Ready for publish marker +- After validating all requirements against the Original Idea Description, create an empty READY_TO_PUBLISH file at repo root (this file is created by the final publish step). diff --git a/opengrowth_privacy_preserving_federated/__init__.py b/opengrowth_privacy_preserving_federated/__init__.py new file mode 100644 index 0000000..c36566f --- /dev/null +++ b/opengrowth_privacy_preserving_federated/__init__.py @@ -0,0 +1,5 @@ +"""OpenGrowth privacy-preserving federated experiments package.""" + +from .aggregator import SecureAggregator + +__all__ = ["SecureAggregator"] diff --git a/opengrowth_privacy_preserving_federated/aggregator.py b/opengrowth_privacy_preserving_federated/aggregator.py new file mode 100644 index 0000000..640e8fb --- /dev/null +++ b/opengrowth_privacy_preserving_federated/aggregator.py @@ -0,0 +1,102 @@ +"""Simple SecureAggregator for OpenGrowth MVP. + +This is a lightweight, production-friendly utility that collects numeric samples +and computes a mean with a 95% confidence interval using a t-distribution. +It's intentionally small and dependency-free to ensure reliable packaging and +easy reasoning during tests. +""" +from math import sqrt +from typing import List + + +class SecureAggregator: + """Compute mean and 95% CI for a stream of numbers. + + - Samples are stored locally (no data is transmitted in this MVP). + - CI is computed using the t-distribution with df = n - 1. For n < 2, CI is 0. + """ + + def __init__(self, confidence: float = 0.95): + if not (0 < confidence < 1): + raise ValueError("confidence must be between 0 and 1") + self.confidence = confidence + self._samples: List[float] = [] + + def add_sample(self, value: float) -> None: + if not isinstance(value, (int, float)): + raise TypeError("sample value must be numeric") + self._samples.append(float(value)) + + def clear(self) -> None: + self._samples.clear() + + def _mean(self) -> float: + if not self._samples: + raise ValueError("no samples available to compute mean") + return sum(self._samples) / len(self._samples) + + def _std_err(self) -> float: + n = len(self._samples) + if n < 2: + return 0.0 + mean = self._mean() + # Sample standard deviation (unbiased): sqrt( sum((x-mean)^2) / (n-1) ) + var = sum((x - mean) ** 2 for x in self._samples) / (n - 1) + sd = sqrt(var) + return sd / sqrt(n) + + def aggregate(self) -> dict: + """Return a dict with mean and CI bounds. + + Example: {"mean": 12.3, "ci_low": 11.1, "ci_high": 13.5} + If there are fewer than 2 samples, CI bounds are equal to the mean and 0. + """ + if not self._samples: + raise ValueError("no samples to aggregate") + mean = self._mean() + n = len(self._samples) + if n < 2: + return {"mean": mean, "ci_low": mean, "ci_high": mean} + + # Use a conservative t-value for 95% CI when df = n-1 + # Approximate t-values for common small n (df): [1: inf, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571, 6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228] + # We'll implement a simple default using the normal approximation for larger n + if n >= 30: + t_value = 1.959964 # approx z-score for 95% CI + else: + # small-sample fallback: a small lookup for a few common n values + t_lookup = { + 2: 4.303, + 3: 3.182, + 4: 2.776, + 5: 2.571, + 6: 2.447, + 7: 2.365, + 8: 2.306, + 9: 2.262, + 10: 2.228, + 11: 2.207, + 12: 2.191, + 13: 2.178, + 14: 2.160, + 15: 2.145, + 16: 2.131, + 17: 2.120, + 18: 2.110, + 19: 2.101, + 20: 2.093, + 21: 2.086, + 22: 2.080, + 23: 2.074, + 24: 2.069, + 25: 2.064, + 26: 2.060, + 27: 2.056, + 28: 2.052, + 29: 2.048, + 30: 2.045, + } + t_value = t_lookup.get(n, 1.96) + se = self._std_err() + half_width = t_value * se + return {"mean": mean, "ci_low": mean - half_width, "ci_high": mean + half_width} diff --git a/opengrowth_privacy_preserving_federated_/__init__.py b/opengrowth_privacy_preserving_federated_/__init__.py index e0463ca..7ee5aee 100644 --- a/opengrowth_privacy_preserving_federated_/__init__.py +++ b/opengrowth_privacy_preserving_federated_/__init__.py @@ -77,3 +77,6 @@ class SegmentAdapter: def fill(self, metrics: dict) -> dict: # Pass-through in this MVP return dict(metrics) + +# Re-export minimal contract primitives for federation extensions +from .contracts import LocalExperiment, SharedSignals, PlanDelta, GraphOfContracts # type: ignore diff --git a/opengrowth_privacy_preserving_federated_/contracts.py b/opengrowth_privacy_preserving_federated_/contracts.py new file mode 100644 index 0000000..11a6e7d --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/contracts.py @@ -0,0 +1,85 @@ +"""Contract primitives for OpenGrowth federated experiments. + +A tiny, production-friendly DSL-inspired layer that models local experiments, +shared signals and plan deltas. This provides a foundation for a Graph-of-Contracts +style federation layer without pulling in heavyweight dependencies. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Optional + + +@dataclass +class LocalExperiment: + """Represents a local experiment definition. + + - name: human-friendly name of the experiment + - variables: mapping of controllable variables (e.g., price, onboarding steps) + - privacy_budget: optional privacy budget hint for local DP or aggregation limits + - metadata: optional extra information + """ + name: str + variables: Dict[str, Any] = field(default_factory=dict) + privacy_budget: Optional[Dict[str, Any]] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return { + "name": self.name, + "variables": self.variables, + "privacy_budget": self.privacy_budget, + "metadata": self.metadata, + } + + +@dataclass +class SharedSignals: + """Represents signals shared after local aggregation (aggregated metrics). + + - signals: a dictionary of metric_name -> aggregated_value + - provenance: optional metadata about sources + """ + signals: Dict[str, Any] = field(default_factory=dict) + provenance: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return {"signals": self.signals, "provenance": self.provenance} + + +@dataclass +class PlanDelta: + """Represents incremental updates from a local experiment. + + - delta: map of changed variables or outcomes + - timestamp: ISO timestamp of the delta + - note: optional human note + """ + delta: Dict[str, Any] = field(default_factory=dict) + timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z") + note: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return {"delta": self.delta, "timestamp": self.timestamp, "note": self.note} + + +class GraphOfContracts: + """Lightweight in-process registry for contracts. + + This is purposely simple: it stores versioned contracts by a key. It is + designed to be extended with cryptographic signing and remote transport later. + """ + + def __init__(self) -> None: + self._contracts: Dict[str, Dict[str, Any]] = {} + + def register_contract(self, name: str, contract: Dict[str, Any], version: str = "1.0") -> None: + self._contracts[name] = {"version": version, "contract": contract} + + def get_contract(self, name: str) -> Optional[Dict[str, Any]]: + entry = self._contracts.get(name) + if not entry: + return None + # return a shallow copy to avoid external mutation + return {"version": entry["version"], "contract": dict(entry["contract"])} diff --git a/pyproject.toml b/pyproject.toml index 40edec9..c933d1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,3 +8,8 @@ version = "0.1.0" description = "Minimal MVP for privacy-preserving federated experiments (OpenGrowth)" readme = "README.md" license = {text = "MIT"} + + +[tool.setuptools.packages.find] +where = ["."] +include = ["opengrowth_privacy_preserving_federated"] diff --git a/sitecustomize.py b/sitecustomize.py new file mode 100644 index 0000000..666b147 --- /dev/null +++ b/sitecustomize.py @@ -0,0 +1,32 @@ +"""Site customization to ensure tests can import legacy underscore package +namespaced modules when running in editable install contexts. + +This hack loads the local opengrowth_privacy_preserving_federated_ package +so imports like `from opengrowth_privacy_preserving_federated_ import ...` +work reliably regardless of packaging discovery. +""" +import importlib.util +import sys +import os + + +def _load_underscore_package_alias(): + # Path to the local underscore package __init__.py + base_dir = os.path.dirname(os.path.abspath(__file__)) + init_path = os.path.join(base_dir, "opengrowth_privacy_preserving_federated_", "__init__.py") + if not os.path.exists(init_path): + return + spec = importlib.util.spec_from_file_location( + "opengrowth_privacy_preserving_federated_", init_path + ) + if spec and spec.loader: + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore + sys.modules["opengrowth_privacy_preserving_federated_"] = module + + +try: + _load_underscore_package_alias() +except Exception: + # Do not fail test startup if this heuristic cannot be applied + pass diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5d9146c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,22 @@ +import importlib +import sys +import os + + +def _load_underscore_alias(): + try: + # Ensure repo root is on sys.path so local packages can be discovered + repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + if repo_root not in sys.path: + sys.path.insert(0, repo_root) + + # Import the local underscore package as a normal package so its + # relative imports resolve correctly. + module = importlib.import_module("opengrowth_privacy_preserving_federated_") + sys.modules["opengrowth_privacy_preserving_federated_"] = module + except Exception: + # If the local package cannot be imported for any reason, skip aliasing. + pass + + +_load_underscore_alias() diff --git a/tests/test_aggregator.py b/tests/test_aggregator.py new file mode 100644 index 0000000..47e31d2 --- /dev/null +++ b/tests/test_aggregator.py @@ -0,0 +1,25 @@ +import pytest + +from opengrowth_privacy_preserving_federated import SecureAggregator + + +def test_mean_and_ci_basic(): + agg = SecureAggregator() + samples = [1.0, 2.0, 3.0, 4.0, 5.0] + for s in samples: + agg.add_sample(s) + res = agg.aggregate() + assert abs(res["mean"] - 3.0) < 1e-9 + assert res["ci_low"] <= res["mean"] <= res["ci_high"] + + +def test_no_samples_raises(): + agg = SecureAggregator() + with pytest.raises(ValueError): + agg.aggregate() + + +def test_non_numeric_sample_raises(): + agg = SecureAggregator() + with pytest.raises(TypeError): + agg.add_sample("not-a-number")