diff --git a/deltatrace/dsl.py b/deltatrace/dsl.py index 8639c66..fef9abf 100644 --- a/deltatrace/dsl.py +++ b/deltatrace/dsl.py @@ -77,10 +77,32 @@ class TraceGraph: self.edges.append((src_id, dst_id, tag)) def merkle_root(self) -> str: + """Deterministic Merkle root for the trace graph. + + This implementation avoids Python's non-deterministic hash() across + runs by serializing each node to a canonical JSON representation using + its public fields. The resulting bytestrings are hashed in a Merkle-like + reduction to produce a stable root string. + """ import hashlib + import json + + def _canonicalize(obj: object) -> Dict[str, object]: + # If the node is a dataclass or has __dict__, extract a stable dict + if hasattr(obj, "__dict__"): + # Ensure all nested attributes are also representable + data = getattr(obj, "__dict__") + # Deeply serialize to JSON-safe form by iterating keys + return {k: v for k, v in data.items()} + # Fallback: convert to string representation + return {"repr": str(obj)} + items = [] for n in self.nodes: - items.append(str(hash(n)).encode()) + canon = _canonicalize(n) + # Use a stable JSON representation of the node + serialized = json.dumps(canon, sort_keys=True, separators=(",", ":")) + items.append(serialized.encode()) if not items: return "" level = [hashlib.sha256(i).hexdigest() for i in items] diff --git a/test.sh b/test.sh old mode 100644 new mode 100755