openbench-privacy-preservin.../openbench_privacy_preservin.../core.py

106 lines
3.5 KiB
Python

from __future__ import annotations
import json
import os
import uuid
from dataclasses import dataclass, asdict, field
from datetime import datetime
from typing import List, Optional
import random
import math
STORE_DIR = os.path.join(os.path.dirname(__file__), "data")
STORE_PATH = os.path.join(STORE_DIR, "kpi_records.jsonl")
@dataclass
class KPIRecord:
revenue: float
cogs: float
inventory_turns: float
lead_time: float
cac: float
ltv: float
region: str = "global"
industry: str = "unknown"
anon_id: str = field(default_factory=lambda: uuid.uuid4().hex)
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
class LocalStore:
"""Simple offline-first store for KPI records (JSONL)."""
def __init__(self, path: Optional[str] = None) -> None:
self.path = path or STORE_PATH
os.makedirs(os.path.dirname(self.path), exist_ok=True)
def add_kpi(self, record: KPIRecord) -> None:
with open(self.path, "a", encoding="utf-8") as f:
f.write(json.dumps(asdict(record)) + "\n")
def get_all(self) -> List[KPIRecord]:
records: List[KPIRecord] = []
if not os.path.exists(self.path):
return records
with open(self.path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
# Guard against missing fields in legacy lines
records.append(KPIRecord(**data))
return records
class SecureAggregator:
"""Privacy-aware aggregate over KPI records.
By default, returns the plain mean. When anonymize=True, adds Laplace noise
to the result to preserve differential privacy characteristics. Noise is
deterministic for unit tests by allowing a fixed seed on the RNG via Python's
random module; in production, a robust rng should be used.
"""
@staticmethod
def _laplace_sample(b: float) -> float:
# Laplace sampling via the difference of two exponentials: ~ Laplace(0, b)
u1 = random.random()
u2 = random.random() if u1 == 0 else random.random()
# ensure positive values and stable logs
return -b * (math.log(u1 + 1e-12) - math.log(u2 + 1e-12))
@staticmethod
def aggregate(records: List[KPIRecord], metric: str, anonymize: bool = False, epsilon: float = 1.0) -> float:
if not records:
return 0.0
if not hasattr(KPIRecord, '__annotations__') or metric not in KPIRecord.__annotations__:
raise ValueError(f"Unknown metric '{metric}' for KPIRecord.")
values = [getattr(r, metric) for r in records]
mean = sum(values) / len(values) if values else 0.0
if anonymize:
b = 1.0 / max(epsilon, 1e-9)
noise = SecureAggregator._laplace_sample(b)
mean += noise
return mean
class GrowthCalculator:
"""Derived-growth helpers for KPI records."""
@staticmethod
def roi(record: KPIRecord) -> float:
# Simple return on investment proxy: (LTV - CAC) / CAC
if record.cac == 0:
return float('inf')
return (record.ltv - record.cac) / record.cac
@staticmethod
def growth_index(record: KPIRecord) -> float:
# A lightweight composite growth index using revenue and LTV
return (record.revenue + record.ltv) / max(1.0, record.cogs)
__all__ = ["KPIRecord", "LocalStore", "SecureAggregator", "GrowthCalculator"]