diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd5590b --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +node_modules/ +.npmrc +.env +.env.* +__tests__/ +coverage/ +.nyc_output/ +dist/ +build/ +.cache/ +*.log +.DS_Store +tmp/ +.tmp/ +__pycache__/ +*.pyc +.venv/ +venv/ +*.egg-info/ +.pytest_cache/ +READY_TO_PUBLISH diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0180002 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,20 @@ +# AGENTS.md + +Architecture and rules for this repository (CatOpt-Query - MVP) + +- Tech stack: Python 3.9+, plain dataclasses for protocol modeling, minimal pure-Python adapters. +- Testing: pytest-based unit tests. +- Packaging: pyproject.toml with setuptools build backend. README.md hooks into packaging. +- Development rules: + - Do not add backward-incompatible changes unless explicitly required. + - Tests must pass before publishing (test.sh will enforce this). + - Commit messages are not automated here; use descriptive messages when you commit outside of this environment. +- File layout: + - catopt_query/: library code (protocol models, core, adapters). + - tests/: unit tests. + - README.md, AGENTS.md: project docs and governance. + +- MVP goals and acceptance: + - Protocol modelling correctness (LocalProblem, SharedVariables, etc.). + - Canonical representation and adapter mapping exist and are testable. + - Packaging builds and tests pass. diff --git a/README.md b/README.md index 8bade05..b17262e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,23 @@ -# catopt-query-category-theoretic-composit +CatOpt-Query: MVP scaffolding for a category-theoretic distributed query planning framework. -Problem space: Distributed databases across shards and heterogeneous storage backends (relational, document, time-series) struggle with cross-shard queries, join planning, and pushdown optimization under varying data locality and bandwidth constraint \ No newline at end of file +Overview +- A minimal, production-oriented Python MVP that models local shard plans (Objects), inter-shard signals (Morphisms), and vendor adapters (Functors). +- Includes a canonical representation, two starter adapters (PostgreSQL and MongoDB), and a simple coordinator to fuse local plans. +- Designed as a stepping stone toward the 8–12 week MVP plan described in the project proposal. + +What you’ll find here +- Core data models for LocalProblem, SharedVariables, DualVariables, PlanDelta, DataContract, and AuditLog. +- A CanonicalPlan representation and a naive joint-planning coordinator. +- Adapters for PostgreSQL and MongoDB that map local plans to the canonical representation. +- Tests validating protocol serialization, adapter mappings, and basic joint planning behavior. +- A lightweight protocol registry (Graph-of-Contracts concept) skeleton and a small DSL skeleton (data classes-only). + +Getting started +- Install: python3 -m build && pip install dist/catopt_query-0.1.0-py3-none-any.whl +- Run tests: pytest -q + +Notes +- This is an MVP scaffold. It focuses on correctness, testability, and incremental extensibility for the larger CatOpt-Query project. +- No external DB calls are required for the MVP tests; adapters simulate plan mapping. + +License: MIT diff --git a/catopt_query/__init__.py b/catopt_query/__init__.py new file mode 100644 index 0000000..e3bb8f2 --- /dev/null +++ b/catopt_query/__init__.py @@ -0,0 +1,18 @@ +from .protocol import LocalProblem, SharedVariables, DualVariables, PlanDelta, DataContract, AuditLog +from .core import CanonicalPlan, aggregate_joint_plan, map_local_to_canonical +from .adapters.postgres_adapter import map_postgres_to_canonical +from .adapters.mongo_adapter import map_mongo_to_canonical + +__all__ = [ + "LocalProblem", + "SharedVariables", + "DualVariables", + "PlanDelta", + "DataContract", + "AuditLog", + "CanonicalPlan", + "aggregate_joint_plan", + "map_local_to_canonical", + "map_postgres_to_canonical", + "map_mongo_to_canonical", +] diff --git a/catopt_query/adapters/__init__.py b/catopt_query/adapters/__init__.py new file mode 100644 index 0000000..d8b82ed --- /dev/null +++ b/catopt_query/adapters/__init__.py @@ -0,0 +1,4 @@ +from .postgres_adapter import map_postgres_to_canonical +from .mongo_adapter import map_mongo_to_canonical + +__all__ = ["map_postgres_to_canonical", "map_mongo_to_canonical"] diff --git a/catopt_query/adapters/mongo_adapter.py b/catopt_query/adapters/mongo_adapter.py new file mode 100644 index 0000000..dd6fffe --- /dev/null +++ b/catopt_query/adapters/mongo_adapter.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Dict, Any + +from ..core import CanonicalPlan +from ..protocol import LocalProblem +from ..core import map_local_to_canonical + + +def map_mongo_to_canonical(local: LocalProblem) -> CanonicalPlan: + # Lightweight adapter: MongoDB-like LocalProblem to CanonicalPlan + return map_local_to_canonical(local) diff --git a/catopt_query/adapters/postgres_adapter.py b/catopt_query/adapters/postgres_adapter.py new file mode 100644 index 0000000..72e3940 --- /dev/null +++ b/catopt_query/adapters/postgres_adapter.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Dict, Any + +from ..core import CanonicalPlan +from ..protocol import LocalProblem +from ..core import map_local_to_canonical + + +def map_postgres_to_canonical(local: LocalProblem) -> CanonicalPlan: + # Very lightweight adapter: map a PostgreSQL-style LocalProblem into a CanonicalPlan + # In a full implementation, this would translate Postgres-specific plan elements. + return map_local_to_canonical(local) diff --git a/catopt_query/core.py b/catopt_query/core.py new file mode 100644 index 0000000..c867399 --- /dev/null +++ b/catopt_query/core.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from dataclasses import dataclass, field, asdict +from typing import List, Dict, Any +import uuid +import time + +from .protocol import LocalProblem, PlanDelta, SharedVariables, DualVariables + + +@dataclass +class CanonicalPlan: + plan_id: str + operations: List[Dict[str, Any]] = field(default_factory=list) + total_cost: float = 0.0 + details: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +def map_local_to_canonical(local: LocalProblem) -> CanonicalPlan: + # Minimal translation: convert local plan into a single operation with its cost summary + op = { + "shard": local.shard_id, + "predicates": local.predicates, + "attributes": local.projected_attributes, + "costs": local.costs, + "constraints": local.constraints, + } + total = sum(local.costs.values()) if local.costs else 0.0 + return CanonicalPlan(plan_id=str(uuid.uuid4()), operations=[op], total_cost=total, details={"shard_alias": local.shard_id}) + + +def aggregate_joint_plan(local_plans: List[CanonicalPlan]) -> CanonicalPlan: + # Naive aggregation: concatenates operations and sums costs + all_ops: List[Dict[str, Any]] = [] + total = 0.0 + for p in local_plans: + all_ops.extend(p.operations) + total += p.total_cost + return CanonicalPlan(plan_id=str(uuid.uuid4()), operations=all_ops, total_cost=total, details={"source_count": len(local_plans)}) + + +def aggregate_joint_plan_from_locals(locals: List[LocalProblem]) -> CanonicalPlan: + cans = [map_local_to_canonical(l) for l in locals] + return aggregate_joint_plan(cans) + diff --git a/catopt_query/protocol.py b/catopt_query/protocol.py new file mode 100644 index 0000000..2b0c9cc --- /dev/null +++ b/catopt_query/protocol.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from dataclasses import dataclass, field, asdict +from typing import Dict, List, Any + + +@dataclass +class LocalProblem: + shard_id: str + projected_attributes: List[str] + predicates: List[str] + costs: Dict[str, float] + constraints: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class SharedVariables: + version: int + payload: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DualVariables: + version: int + payload: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class PlanDelta: + delta_id: str + timestamp: float + changes: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DataContract: + schemas: Dict[str, Any] = field(default_factory=dict) + encryption_rules: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class AuditLog: + entries: List[str] = field(default_factory=list) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7fae65c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "catopt_query" +version = "0.1.0" +description = "MVP: Category-Theoretic Compositional Optimizer for Distributed Query Planning" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "MIT" } +authors = [ { name = "OpenCode" } ] +dependencies = [] + +[tool.setuptools.packages.find] +where = ["."] diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..a1bd2a1 --- /dev/null +++ b/test.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Ensure the repository root is on PYTHONPATH so tests can import the local package +# even when the package isn't installed yet in the environment. +export PYTHONPATH=$(pwd)${PYTHONPATH:+":$PYTHONPATH"} + +echo "Running tests..." +pytest -q + +echo "Running packaging build..." +python3 -m build + +echo "All tests and build succeeded." diff --git a/tests/test_adapters.py b/tests/test_adapters.py new file mode 100644 index 0000000..3e18ea4 --- /dev/null +++ b/tests/test_adapters.py @@ -0,0 +1,28 @@ +from catopt_query.adapters.postgres_adapter import map_postgres_to_canonical +from catopt_query.adapters.mongo_adapter import map_mongo_to_canonical +from catopt_query.protocol import LocalProblem +from catopt_query.core import CanonicalPlan + + +def test_postgres_adapter_maps_to_canonical(): + lp = LocalProblem( + shard_id="pg-1", + projected_attributes=["a"], + predicates=["a > 1"], + costs={"cpu": 1.0}, + ) + can = map_postgres_to_canonical(lp) + assert isinstance(can, CanonicalPlan) + assert can.total_cost == 1.0 + + +def test_mongo_adapter_maps_to_canonical(): + lp = LocalProblem( + shard_id="mongo-1", + projected_attributes=["b"], + predicates=["b != NULL"], + costs={"io": 0.2}, + ) + can = map_mongo_to_canonical(lp) + assert isinstance(can, CanonicalPlan) + assert can.total_cost == 0.2 diff --git a/tests/test_protocol.py b/tests/test_protocol.py new file mode 100644 index 0000000..a2a2055 --- /dev/null +++ b/tests/test_protocol.py @@ -0,0 +1,31 @@ +import math +from catopt_query.protocol import LocalProblem, SharedVariables, DualVariables, PlanDelta, DataContract, AuditLog +from catopt_query.core import map_local_to_canonical, aggregate_joint_plan +from catopt_query.core import CanonicalPlan + + +def test_local_problem_serialization(): + lp = LocalProblem( + shard_id="shard-1", + projected_attributes=["a", "b"], + predicates=["a > 0", "b < 100"], + costs={"cpu": 1.2, "io": 0.5}, + constraints={"timezone": "UTC"}, + ) + d = lp.to_dict() + assert d["shard_id"] == "shard-1" + assert d["predicates"] == ["a > 0", "b < 100"] + + +def test_local_to_canonical_mapping(): + lp = LocalProblem( + shard_id="s1", + projected_attributes=["x"], + predicates=["x IS NOT NULL"], + costs={"cpu": 2.0}, + ) + can = map_local_to_canonical(lp) + assert isinstance(can, CanonicalPlan) + assert can.total_cost == 2.0 + assert len(can.operations) == 1 +