guardrail-space-verifiable-.../guardrail_space/guard.py

40 lines
1.9 KiB
Python

from __future__ import annotations
import json
from typing import Dict, Any, Optional
from .contract import SafetyContract
from .policy import PolicyEngine
from .shadow_planner import ShadowPlanner
LOG_FILE = "guard_logs.jsonl"
class GuardModule:
def __init__(self, contract: SafetyContract, shadow: Optional[ShadowPlanner] = None):
self.contract = contract
self.engine = PolicyEngine(contract)
self.shadow = shadow or ShadowPlanner()
def log_decision(self, entry: Dict[str, Any]) -> None:
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(entry) + "\n")
def evaluate_plan(self, plan: Dict[str, Any], state: Dict[str, Any]) -> Dict[str, Any]:
# First, run the policy engine
result = self.engine.evaluate(plan, state)
decision = {"allowed": result.get("approved", result.get("allowed", True)) if False else result.get("allowed", True), "reason": result.get("reason", "ok")}
# If allowed, return as-is
if decision["allowed"]:
self.log_decision({"contract_id": self.contract.contract_id, "plan": plan, "state": state, "decision": "allow"})
return {"decision": "allow", "plan": plan}
# If not allowed, attempt a safe modification via shadow planner
safe_plan = self.shadow.propose_safe(plan, state, self.contract)
if safe_plan:
self.log_decision({"contract_id": self.contract.contract_id, "plan": plan, "state": state, "decision": "modify", "new_plan": safe_plan})
return {"decision": "modify", "new_plan": safe_plan, "reason": result.get("reason", "veto_by_guard")}
# If no safe alternative, veto with reason
self.log_decision({"contract_id": self.contract.contract_id, "plan": plan, "state": state, "decision": "veto"})
return {"decision": "veto", "reason": result.get("reason", "guard_veto")}