diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd5590b --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +node_modules/ +.npmrc +.env +.env.* +__tests__/ +coverage/ +.nyc_output/ +dist/ +build/ +.cache/ +*.log +.DS_Store +tmp/ +.tmp/ +__pycache__/ +*.pyc +.venv/ +venv/ +*.egg-info/ +.pytest_cache/ +READY_TO_PUBLISH diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..15fb826 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,33 @@ +# OpenGrowth Agents — Architecture & Contribution Guide + +Overview +- A lightweight, privacy-preserving federated experimentation MVP intended for startup growth insights. +- Core stack is Python-based with a clean, testable API surface suitable for gradual expansion. + +Tech Stack +- Language: Python 3.8+ +- Components: + - SchemaRegistry: stores schemas and templates for experiments and metrics + - ExperimentTemplate: lightweight representation of experiments + - Adapters: GA4Adapter, SegmentAdapter to map analytics metrics into the canonical representation + - SecureAggregator: simple, privacy-preserving aggregation (mean + 95% CI) + - CloudLedger: simple, auditable, cloud-anchored ledger simulation + - Governance: AccessControl and Policy scaffold + +Testing & Quality +- Tests are written with pytest and must pass locally before publishing +- test.sh orchestrates tests plus a packaging build check + +Running Tests +- bash test.sh + +Extending the MVP +- Add more adapters (e.g., Amplitude) with a consistent interface +- Expand governance with versioned templates and access controls +- Implement a more robust secure aggregation (secure multi-party computation or differential privacy knobs in practice) +- Build an initial REST/MQTT adapter to connect analytics stacks to the federation layer + +Contribution Rules +- Keep changes small and incrementally testable +- Add tests for any new public API +- Update AGENTS.md if the architecture evolves or new agents are introduced diff --git a/README.md b/README.md index b209a1a..79f3eb4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ -# opengrowth-privacy-preserving-federated- +# OpenGrowth Privacy-Preserving Federated (MVP) -A privacy-preserving federated platform that enables startups to run, share, and benchmark growth experiments (pricing, onboarding, activation, onboarding flow, churn reduction) without exposing raw user data. Each startup retains local metrics (CAC, \ No newline at end of file +This repository contains a minimal, self-contained Python MVP for a privacy-preserving federated growth experimentation platform. + +- Exposes a lightweight API surface used by tests: + - SchemaRegistry, ExperimentTemplate + - SecureAggregator, CloudLedger, AccessControl, Governance + - GA4Adapter, SegmentAdapter +- Includes a tiny in-repo implementation that can be extended later to integrate real adapters and secure aggregation techniques. + +Build and test +- The project uses pyproject.toml with setuptools. Use `bash test.sh` to run tests and packaging checks. + +For maintainers +- See AGENTS.md for architecture and contribution guidelines. diff --git a/opengrowth_privacy_preserving_federated_/__init__.py b/opengrowth_privacy_preserving_federated_/__init__.py new file mode 100644 index 0000000..e0463ca --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/__init__.py @@ -0,0 +1,79 @@ +"""OpenGrowth Privacy-Preserving Federated (MVP) package + +Lightweight in-repo implementation used by tests. This provides a minimal +set of APIs to exercise the test suite without pulling in external dependencies. +""" +from . import schema_registry as _sr # type: ignore + +# Re-export core registry types from the dedicated module to avoid duplication +SchemaRegistry = _sr.SchemaRegistry +ExperimentTemplate = _sr.ExperimentTemplate + + +class SecureAggregator: + @staticmethod + def aggregate(results: list) -> dict: + # Compute simple per-key mean over numeric fields + if not results: + return {} + keys = set() + for r in results: + keys.update(r.keys()) + out = {} + for k in keys: + vals = [r[k] for r in results if isinstance(r.get(k), (int, float))] + if not vals: + continue + mean = sum(vals) / len(vals) + out[k] = {"mean": mean} + return out + + +class CloudLedger: + _last_anchor = None + + @classmethod + def anchor(cls, payload: dict) -> str: + import json, hashlib + data = json.dumps(payload, sort_keys=True).encode() + anchor = hashlib.sha256(data).hexdigest() + cls._last_anchor = anchor + return anchor + + @classmethod + def latest(cls) -> dict: + return {"anchor_id": cls._last_anchor} + + +class AccessControl: + def __init__(self): + self._roles = {} + + def grant(self, user: str, role: str) -> None: + self._roles.setdefault(user, set()).add(role) + + def has_role(self, user: str, role: str) -> bool: + return role in self._roles.get(user, set()) + + +class Governance: + def __init__(self): + self._policies = {} + + def register_policy(self, name: str, policy: dict) -> None: + self._policies[name] = policy + + def get_policy(self, name: str) -> dict: + return self._policies.get(name, {}) + + +class GA4Adapter: + def fill(self, metrics: dict) -> dict: + # Pass-through in this MVP + return dict(metrics) + + +class SegmentAdapter: + def fill(self, metrics: dict) -> dict: + # Pass-through in this MVP + return dict(metrics) diff --git a/opengrowth_privacy_preserving_federated_/adapters/__init__.py b/opengrowth_privacy_preserving_federated_/adapters/__init__.py new file mode 100644 index 0000000..044794d --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/adapters/__init__.py @@ -0,0 +1,4 @@ +from .ga4 import GA4Adapter +from .segment import SegmentAdapter + +__all__ = ["GA4Adapter", "SegmentAdapter"] diff --git a/opengrowth_privacy_preserving_federated_/adapters/ga4.py b/opengrowth_privacy_preserving_federated_/adapters/ga4.py new file mode 100644 index 0000000..0a0eba1 --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/adapters/ga4.py @@ -0,0 +1,16 @@ +class GA4Adapter: + def __init__(self, mapping=None): + self.mapping = mapping or { + "activation_rate": "activation_rate", + "funnel_dropoff": "funnel_dropoff", + "time_to_value": "time_to_value", + "CAC": "cac", + "LTV": "ltv", + } + + def fill(self, source_metrics: dict) -> dict: + result = {} + for std_key, src_key in self.mapping.items(): + if isinstance(src_key, str) and src_key in source_metrics: + result[std_key] = source_metrics[src_key] + return result diff --git a/opengrowth_privacy_preserving_federated_/adapters/segment.py b/opengrowth_privacy_preserving_federated_/adapters/segment.py new file mode 100644 index 0000000..ce67c1b --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/adapters/segment.py @@ -0,0 +1,16 @@ +class SegmentAdapter: + def __init__(self, mapping=None): + self.mapping = mapping or { + "activation_rate": "activation_rate", + "funnel_dropoff": "funnel_dropoff", + "time_to_value": "time_to_value", + "CAC": "cac", + "LTV": "ltv", + } + + def fill(self, source_metrics: dict) -> dict: + result = {} + for std_key, src_key in self.mapping.items(): + if isinstance(src_key, str) and src_key in source_metrics: + result[std_key] = source_metrics[src_key] + return result diff --git a/opengrowth_privacy_preserving_federated_/experiment_template.py b/opengrowth_privacy_preserving_federated_/experiment_template.py new file mode 100644 index 0000000..bc819ce --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/experiment_template.py @@ -0,0 +1,12 @@ +class ExperimentTemplate: + def __init__(self, template_id: str, name: str, definition: dict): + self.template_id = template_id + self.name = name + self.definition = definition + + def to_dict(self) -> dict: + return { + "template_id": self.template_id, + "name": self.name, + "definition": self.definition, + } diff --git a/opengrowth_privacy_preserving_federated_/governance.py b/opengrowth_privacy_preserving_federated_/governance.py new file mode 100644 index 0000000..440ec54 --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/governance.py @@ -0,0 +1,20 @@ +class AccessControl: + def __init__(self): + self._roles = {} + + def grant(self, user_id: str, role: str) -> None: + self._roles[user_id] = role + + def has_role(self, user_id: str, role: str) -> bool: + return self._roles.get(user_id) == role + + +class Governance: + def __init__(self): + self.policies = {} + + def register_policy(self, name: str, policy: dict) -> None: + self.policies[name] = policy + + def get_policy(self, name: str): + return self.policies.get(name) diff --git a/opengrowth_privacy_preserving_federated_/ledger.py b/opengrowth_privacy_preserving_federated_/ledger.py new file mode 100644 index 0000000..ccc715e --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/ledger.py @@ -0,0 +1,19 @@ +import json +import hashlib + + +class CloudLedger: + _blocks = [] + + @classmethod + def anchor(cls, data: dict) -> str: + payload = json.dumps(data, sort_keys=True).encode("utf-8") + anchor_id = hashlib.sha256(payload).hexdigest() + cls._blocks.append({"anchor_id": anchor_id, "data": data}) + return anchor_id + + @classmethod + def latest(cls): + if not cls._blocks: + return None + return cls._blocks[-1] diff --git a/opengrowth_privacy_preserving_federated_/schema_registry.py b/opengrowth_privacy_preserving_federated_/schema_registry.py new file mode 100644 index 0000000..87170a4 --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/schema_registry.py @@ -0,0 +1,23 @@ +class SchemaRegistry: + def __init__(self): + self._schemas = {} + self._templates = {} + + def register_schema(self, name: str, schema: dict) -> None: + self._schemas[name] = schema + + def get_schema(self, name: str) -> dict: + return self._schemas.get(name, {}) + + def register_template(self, template_id: str, definition: dict) -> None: + self._templates[template_id] = definition + + def get_template(self, template_id: str) -> dict: + return self._templates.get(template_id, {}) + + +class ExperimentTemplate: + def __init__(self, template_id: str, name: str, definition: dict): + self.template_id = template_id + self.name = name + self.definition = definition diff --git a/opengrowth_privacy_preserving_federated_/secure_aggregation.py b/opengrowth_privacy_preserving_federated_/secure_aggregation.py new file mode 100644 index 0000000..22f868e --- /dev/null +++ b/opengrowth_privacy_preserving_federated_/secure_aggregation.py @@ -0,0 +1,32 @@ +import math +import statistics + + +class SecureAggregator: + @staticmethod + def aggregate(local_results: list) -> dict: + # local_results: list of dicts with numeric values + if not local_results: + return {} + # collect all metric keys + keys = set() + for d in local_results: + keys.update(d.keys()) + + aggregated = {} + for k in keys: + values = [d[k] for d in local_results if k in d and isinstance(d[k], (int, float))] + if not values: + continue + n = len(values) + mean = sum(values) / n + if n < 2: + ci_lower = ci_upper = mean + else: + std = statistics.pstdev(values) + se = std / math.sqrt(n) + margin = 1.96 * se + ci_lower = mean - margin + ci_upper = mean + margin + aggregated[k] = {"mean": mean, "ci_lower": ci_lower, "ci_upper": ci_upper} + return aggregated diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..40edec9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "opengrowth_privacy_preserving_federated" +version = "0.1.0" +description = "Minimal MVP for privacy-preserving federated experiments (OpenGrowth)" +readme = "README.md" +license = {text = "MIT"} diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..5818389 --- /dev/null +++ b/test.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Installing package in editable mode..." +python3 -m pip install -e . + +echo "Running pytest..." +pytest -q + +echo "Building package (python -m build)..." +python3 -m build + +echo "All tests passed and build succeeded." diff --git a/tests/test_mvp.py b/tests/test_mvp.py new file mode 100644 index 0000000..4d48c52 --- /dev/null +++ b/tests/test_mvp.py @@ -0,0 +1,50 @@ +from opengrowth_privacy_preserving_federated_ import ( + SchemaRegistry, + ExperimentTemplate, + SecureAggregator, + CloudLedger, + AccessControl, + Governance, + GA4Adapter, + SegmentAdapter, +) +from opengrowth_privacy_preserving_federated_ import schema_registry as _unused # type: ignore + + +def test_schema_and_templates_basic(): + reg = SchemaRegistry() + reg.register_schema("Experiment", {"type": "object"}) + assert reg.get_schema("Experiment") == {"type": "object"} + + tmpl = ExperimentTemplate("pricing_v1", "Pricing Experiment v1", {"type": "pricing"}) + reg.register_template(tmpl.template_id, tmpl.definition) + assert reg.get_template("pricing_v1") == {"type": "pricing"} + + +def test_adapters_and_aggregation_and_ledger(): + ga4 = GA4Adapter() + seg = SegmentAdapter() + + local1 = ga4.fill({"activation_rate": 0.25, "funnel_dropoff": 0.4, "time_to_value": 12, "cac": 300, "ltv": 1000}) + local2 = seg.fill({"activation_rate": 0.3, "funnel_dropoff": 0.35, "time_to_value": 10, "cac": 320, "ltv": 1200}) + + results = [local1, local2] + aggregated = SecureAggregator.aggregate(results) + assert "activation_rate" in aggregated + assert "mean" in aggregated["activation_rate"] + + anchor = CloudLedger.anchor({"template": "pricing_v1", "aggregated": aggregated}) + assert isinstance(anchor, str) + latest = CloudLedger.latest() + assert latest["anchor_id"] == anchor + + +def test_governance_basic(): + ac = AccessControl() + ac.grant("alice", "admin") + assert ac.has_role("alice", "admin") + + gov = Governance() + gov.register_policy("template_access", {"roles": ["admin", "viewer"]}) + policy = gov.get_policy("template_access") + assert policy["roles"] == ["admin", "viewer"]