mercurymesh-privacy-preserv.../blueprint/toy_cross_venue_pipeline.py

92 lines
2.5 KiB
Python

"""
Toy cross-venue analytics pipeline
This script demonstrates a very small, deterministic cross-venue merge
workflow that mirrors the MVP idea of aggregating signals from multiple venues
without sharing raw data.
It is intentionally lightweight and self-contained to serve as a starter.
Run: python3 blueprint/toy_cross_venue_pipeline.py
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List
import time
@dataclass
class LocalMarketContext:
venue_id: str
symbol: str
timeframe: str
@dataclass
class MarketSignal:
venue_id: str
symbol: str
timestamp: float
features: Dict[str, float]
@dataclass
class AggregatedSignal:
venue_set: List[str]
feature_vector: Dict[str, float]
privacy_budget_used: float
nonce: int
def simulate_two_venues() -> List[MarketSignal]:
now = time.time()
sigs = [
MarketSignal(
venue_id="venue-A",
symbol="ABC",
timestamp=now,
features={"liquidity_proxy": 0.82, "order_flow_intensity": 0.55, "volatility_proxy": 1.15},
),
MarketSignal(
venue_id="venue-B",
symbol="ABC",
timestamp=now,
features={"liquidity_proxy": 0.78, "order_flow_intensity": 0.60, "volatility_proxy": 1.10},
),
]
return sigs
def aggregate_signals(signals: List[MarketSignal]) -> AggregatedSignal:
if not signals:
return AggregatedSignal(venue_set=[], feature_vector={}, privacy_budget_used=0.0, nonce=0)
# Simple averaging across venues for each feature
keys = set().union(*(s.features.keys() for s in signals))
avg = {k: 0.0 for k in keys}
for s in signals:
for k in keys:
avg[k] = avg[k] + s.features.get(k, 0.0)
n = float(len(signals))
feature_vector = {k: (v / n) for k, v in avg.items()}
privacy_budget_used = min([s.features.get("privacy_budget", 0.0) for s in signals] or [0.0])
return AggregatedSignal(venue_set=[s.venue_id for s in signals], feature_vector=feature_vector, privacy_budget_used=privacy_budget_used, nonce=int(now_epoch()) )
def now_epoch() -> int:
return int(time.time())
def main():
signals = simulate_two_venues()
agg = aggregate_signals(signals)
print("Toy AggregatedSignal")
print(" venues:", agg.venue_set)
print(" features:")
for k, v in agg.feature_vector.items():
print(f" {k}: {v:.4f}")
print(" privacy_budget_used:", agg.privacy_budget_used)
print(" nonce:", agg.nonce)
if __name__ == "__main__":
main()