diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd5590b --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +node_modules/ +.npmrc +.env +.env.* +__tests__/ +coverage/ +.nyc_output/ +dist/ +build/ +.cache/ +*.log +.DS_Store +tmp/ +.tmp/ +__pycache__/ +*.pyc +.venv/ +venv/ +*.egg-info/ +.pytest_cache/ +READY_TO_PUBLISH diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..d63d57b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,12 @@ +# Architecture and Contribution Guide + +- Language: Python 3.8+ +- Core Tech Stack: NumPy for numeric computations; a minimal federated learning abstraction. +- Key Components: + - Client: local dataset, performs simple gradient descent to update weights, caches updates when offline. + - Server: aggregates client weight deltas with optional DP-noise, updates global model. +- Testing: + - Run tests via ./test.sh which executes pytest and validates packaging with python -m build. +- How to contribute: + - Implement new privacy-preserving aggregation strategies or richer client models. + - Extend tests to cover offline caching and connectivity scenarios. diff --git a/README.md b/README.md index 799ac12..edd641d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ -# interplanetary-edge-orchestrator-privacy +# Interplanetary Edge Orchestrator: Privacy-Preserving Federated Optimization -Problem: Space habitats and deep-space missions rely on fleets of autonomous robots (rover, aerial drones, maintenance bots) and stationary modules that must operate with intermittent or no connectivity. Centralized planning is infeasible due to late \ No newline at end of file +This repository contains a minimal, working Python simulation of a privacy-preserving +federated optimization layer designed for fleets of robotics operating with offline-first +connectivity in space habitats. It demonstrates a simple, DP-friendly aggregation of local +updates from multiple clients to form a global model. + +Usage highlights: +- Lightweight Client and Server implemented in Python. +- Local data training using gradient descent for linear regression. +- Privacy-preserving flavor via optional noise on aggregated updates. +- Offline-first capability via local update caching (non-connected clients save updates to disk). + +How to run tests: +- This repository provides a test script via test.sh (see below). diff --git a/interplanetary_edge_orchestrator_privacy/__init__.py b/interplanetary_edge_orchestrator_privacy/__init__.py new file mode 100644 index 0000000..c06511d --- /dev/null +++ b/interplanetary_edge_orchestrator_privacy/__init__.py @@ -0,0 +1,8 @@ +"""Interplanetary Edge Orchestrator: Privacy-Preserving Federated Optimization +This package provides a lightweight simulation of a federated learning +orchestrator suitable for offline-first operation in space habitats. +""" + +from .federated import Client, Server + +__all__ = ["Client", "Server"] diff --git a/interplanetary_edge_orchestrator_privacy/federated.py b/interplanetary_edge_orchestrator_privacy/federated.py new file mode 100644 index 0000000..e0718c1 --- /dev/null +++ b/interplanetary_edge_orchestrator_privacy/federated.py @@ -0,0 +1,90 @@ +"""Privacy-Preserving Federated Optimization (Minimal Pure-Python). + +This version eliminates NumPy dependency by operating on plain Python lists. +It provides the same API used by the tests and the minimal demo: +- Clients train a simple linear regression model using gradient descent on lists. +- The Server aggregates deltas, with optional additive Gaussian noise for DP. +- Online/Offline (offline-first) behavior is supported via simple pickle-based caching. +""" + +from typing import List, Optional +import os +import pickle +import random + + +class Client: + def __init__(self, client_id: int, data_X, data_y, connected: bool = True, cache_dir: str = "cache"): + self.client_id = client_id + self.X = [list(row) for row in data_X] # 2D list: n_samples x n_features + self.y = list(data_y) # 1D list: n_samples + self.connected = connected + self.cache_dir = cache_dir + self.w = None # local weights as 1D list + + def initialize(self, n_features: int): + if self.w is None: + self.w = [0.0 for _ in range(n_features)] + return self.w + + def train(self, w: List[float], lr: float = 0.01, epochs: int = 5) -> List[float]: + if self.w is None: + self.initialize(len(w)) + n_samples = len(self.y) + n_features = len(self.w) + for _ in range(epochs): + # predictions + pred = [sum(self.X[i][k] * self.w[k] for k in range(n_features)) for i in range(n_samples)] + residual = [pred[i] - self.y[i] for i in range(n_samples)] + # gradient + grad = [sum(self.X[i][j] * residual[i] for i in range(n_samples)) / n_samples for j in range(n_features)] + # update local weights + self.w = [self.w[j] - lr * grad[j] for j in range(n_features)] + update = [self.w[j] - w[j] for j in range(n_features)] + if not self.connected: + self._save_update(update) + return update + + def _cache_path(self) -> str: + os.makedirs(self.cache_dir, exist_ok=True) + return os.path.join(self.cache_dir, f"client_{self.client_id}_update.pkl") + + def _save_update(self, update: List[float]): + path = self._cache_path() + with open(path, 'wb') as f: + pickle.dump(update, f) + + def load_update(self) -> List[float]: + path = self._cache_path() + if not os.path.exists(path): + raise FileNotFoundError(path) + with open(path, 'rb') as f: + return pickle.load(f) + + +class Server: + def __init__(self, n_features: int, initial_weights: Optional[List[float]] = None): + self.n_features = int(n_features) + if initial_weights is None: + self.w = [0.0 for _ in range(self.n_features)] + else: + self.w = list(initial_weights) + + def aggregate(self, updates: List[List[float]], noise_scale: float = 0.0, seed: Optional[int] = None) -> List[float]: + if not updates: + return self.w + # Deterministic seed when provided for testability + rng = random.Random(seed) if seed is not None else random.Random() + # compute average delta + avg_delta = [0.0 for _ in range(self.n_features)] + for upd in updates: + for i in range(self.n_features): + avg_delta[i] += upd[i] + avg_delta = [d / len(updates) for d in avg_delta] + # add noise if requested + if noise_scale and noise_scale > 0.0: + for i in range(self.n_features): + avg_delta[i] += rng.gauss(0.0, noise_scale) + # update global model + self.w = [self.w[i] + avg_delta[i] for i in range(self.n_features)] + return self.w diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a9f00cf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "interplanetary-edge-orchestrator-privacy" +version = "0.1.0" +description = "Privacy-preserving federated optimization for robotic fleets in space habitats (offline-first)." +readme = "README.md" +requires-python = ">=3.8" + +[project.urls] +Homepage = "https://example.com/interplanetary-edge-orchestrator-privacy" + +[tool.setuptools.packages.find] +where = ["."] diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..3e5ab2c --- /dev/null +++ b/test.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Run unit tests +pytest -q + +# Build the package to verify packaging metadata and directory structure +python3 -m build diff --git a/tests/test_federated.py b/tests/test_federated.py new file mode 100644 index 0000000..d8412bd --- /dev/null +++ b/tests/test_federated.py @@ -0,0 +1,54 @@ +import os +import sys +# Ensure repository root is on sys.path for package import during tests +ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if ROOT not in sys.path: + sys.path.insert(0, ROOT) +from interplanetary_edge_orchestrator_privacy import Client, Server +import random +def generate_dataset(n_samples: int, n_features: int, seed: int = 0): + rng = random.Random(seed) + X = [[rng.gauss(0.0, 1.0) for _ in range(n_features)] for _ in range(n_samples)] + true_w = [rng.gauss(0.0, 1.0) for _ in range(n_features)] + y = [sum(X[i][k] * true_w[k] for k in range(n_features)) + rng.gauss(0.0, 0.1) for i in range(n_samples)] + return X, y, true_w + + +def test_basic_federated_aggregation_improves_model(): + random.seed(0) + n_features = 2 + n_clients = 3 + clients = [] + for i in range(n_clients): + X, y, _ = generate_dataset(30, n_features, seed=i+1) + c = Client(client_id=i, data_X=X, data_y=y, connected=True) + clients.append(c) + server = Server(n_features) + + # Initial global weights (zeros) + w_init = server.w.copy() + # Each client trains locally and returns its update (delta) + updates = [] + for c in clients: + c.initialize(n_features) + update = c.train(server.w, lr=0.01, epochs=20) + updates.append(update) + + # Aggregate updates on the server (no DP noise for determinism in test) + server.aggregate(updates, noise_scale=0.0, seed=123) + + # Sanity: server weights should have moved away from initial zeros + assert isinstance(server.w, list) and len(server.w) == n_features + assert any(abs(server.w[i] - w_init[i]) > 1e-9 for i in range(n_features)) + + # Optional sanity: compute average loss reduction on clients after aggregation + total_initial_loss = 0.0 + total_final_loss = 0.0 + for c in clients: + # initial loss with zeros (predictions are zeros for all samples) + pred0 = [0.0 for _ in range(len(c.y))] + total_initial_loss += sum((pred0[i] - c.y[i]) ** 2 for i in range(len(c.y))) + # final loss with new global weights + pred1 = [sum(c.X[i][k] * server.w[k] for k in range(n_features)) for i in range(len(c.y))] + total_final_loss += sum((pred1[i] - c.y[i]) ** 2 for i in range(len(c.y))) + assert total_final_loss <= total_initial_loss + 1e-6