From c9310b1079478bf4430b0a2dfb25445d7ccc9b59 Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Wed, 22 Apr 2026 19:23:07 +0200 Subject: [PATCH] fix(ingestion): always append .md extension to written filenames brain_write with a custom filename omitted the .md extension, causing search to skip the file (search.go filters on HasSuffix .md). Co-Authored-By: Claude Sonnet 4.6 --- .mcp.json | 10 + docs/multi-model-routing.md | 241 ++ .../plans/2026-04-17-hyperguild-phase1.md | 2138 +++++++++++++++++ .../plans/2026-04-19-hyperguild-phase2.md | 1871 +++++++++++++++ .../2026-04-20-model-orchestration-plan.md | 1617 +++++++++++++ .../plans/2026-04-22-phase4-attempt-wiring.md | 1073 +++++++++ ingestion/internal/api/handler.go | 6 +- 7 files changed, 6955 insertions(+), 1 deletion(-) create mode 100644 .mcp.json create mode 100644 docs/multi-model-routing.md create mode 100644 docs/superpowers/plans/2026-04-17-hyperguild-phase1.md create mode 100644 docs/superpowers/plans/2026-04-19-hyperguild-phase2.md create mode 100644 docs/superpowers/plans/2026-04-20-model-orchestration-plan.md create mode 100644 docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..8f1bc64 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,10 @@ +{ + "mcpServers": { + "supervisor": { + "command": "/Users/mathias/dev/AI/supervisor/bin/supervisor-bridge", + "env": { + "SUPERVISOR_URL": "http://koala:30320/mcp" + } + } + } +} diff --git a/docs/multi-model-routing.md b/docs/multi-model-routing.md new file mode 100644 index 0000000..1a6f31b --- /dev/null +++ b/docs/multi-model-routing.md @@ -0,0 +1,241 @@ +# Multi-Model Routing for supervisor + +Reference document for implementing multi-model access within the supervisor project. +Researched April 2026. Constraints: Claude Max subscription (ToS must be respected). + +--- + +## Goal + +Route tasks to specialized, cheaper, or local models during agent and skill flows — without +violating Anthropic's terms or introducing unnecessary infrastructure risk. + +--- + +## Hard Constraints + +- Claude Max subscription is in use. Anthropic's April 2026 terms **prohibit using the + subscription with third-party harnesses that spoof the Anthropic API surface**. +- `ANTHROPIC_BASE_URL` → LiteLLM workaround is explicitly out of scope. +- Claude must remain the reasoning engine. Other models are tools, not replacements. + +--- + +## Infrastructure Available + +| Machine | Role | Relevant services | +|---------|------|-------------------| +| koala | GPU inference | llama-swap, Ollama, Qdrant, LiteLLM proxy | +| iguana | Services, builds | k3s, general services | +| flamingo | Daily driver | Claude Code runs here | + +LiteLLM proxy on koala exposes 100+ models (local + cloud) through a unified API. +All machines connected via Tailscale. + +--- + +## Approved Patterns + +### Pattern 1 — Native Claude model tiering (zero build) + +Claude Code subagents support per-agent model selection via frontmatter. +Use this for cost routing within the Claude model family. + +```yaml +# ~/.claude/agents/explorer.md +--- +name: explorer +description: File reading, code search, codebase mapping — use for all exploration tasks +model: haiku +--- +``` + +- `haiku` for exploration, summarization, classification +- `sonnet` (default) for main reasoning and implementation +- `opus` for deep analysis, architecture decisions + +**When to use**: Always. Add `model: haiku` to any subagent that does read-heavy or +classification work. Cheapest and fastest path to cost control. + +--- + +### Pattern 2 — MCP tools wrapping local models (primary build target) + +Expose local models on koala as named MCP tools. Claude remains the orchestrator and +reasoning engine — it calls local models as tools the same way it calls any other tool. + +This is the intended MCP use case and carries zero ToS risk. + +**Semantic contract**: Claude decides *when* to delegate based on the tool description. +Write descriptions that tell Claude what the model is good for. + +#### MCP server implementation + +Small Python server, run on koala or flamingo, registered in Claude Code settings. + +```python +# supervisor/scripts/mcp_local_models.py +import mcp +import requests + +server = mcp.Server("local-models") + +LITELLM_BASE = "http://koala:4000" +OLLAMA_BASE = "http://koala:11434" + +def _litellm_chat(model: str, prompt: str) -> str: + r = requests.post(f"{LITELLM_BASE}/v1/chat/completions", json={ + "model": model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2048, + }) + r.raise_for_status() + return r.json()["choices"][0]["message"]["content"] + + +@server.tool() +def ask_local_llama(prompt: str) -> str: + """Ask the local Llama model on koala. + Use for: bulk summarization, first-pass analysis, classification, simple Q&A, + anything that does not require deep reasoning or up-to-date knowledge. + Faster and cheaper than cloud models for routine subtasks.""" + return _litellm_chat("llama3-local", prompt) + + +@server.tool() +def ask_coding_model(code: str, question: str) -> str: + """Ask a code-specialized local model. + Use for: syntax checking, boilerplate generation, code formatting questions, + simple refactors where pattern-matching is sufficient.""" + return _litellm_chat("codellama-local", f"Code:\n{code}\n\nQuestion: {question}") + + +@server.tool() +def list_available_local_models() -> list[str]: + """List all models currently available on the local LiteLLM proxy.""" + r = requests.get(f"{LITELLM_BASE}/v1/models") + r.raise_for_status() + return [m["id"] for m in r.json()["data"]] + + +if __name__ == "__main__": + mcp.run_stdio_server(server) +``` + +#### Register in Claude Code + +Add to `~/.claude/settings.json` (or project-level `.claude/settings.json`): + +```json +{ + "mcpServers": { + "local-models": { + "command": "python3", + "args": ["/path/to/supervisor/scripts/mcp_local_models.py"] + } + } +} +``` + +#### LiteLLM config additions needed on koala + +```yaml +# litellm config.yaml — add model entries for local models +model_list: + - model_name: llama3-local + litellm_params: + model: ollama/llama3.2 + api_base: http://localhost:11434 + + - model_name: codellama-local + litellm_params: + model: ollama/codellama + api_base: http://localhost:11434 +``` + +--- + +### Pattern 3 — External orchestration scripts (for pipeline workflows) + +For multi-model pipelines that don't need to live inside a Claude Code session. +These scripts use their own API key (separate from Max subscription — API billing), +so they can call Claude API + LiteLLM freely. + +Claude Code invokes them via the Bash tool. + +``` +Claude Code → [Bash tool] → ./scripts/orchestrate.py → {Claude API, LiteLLM, local models} +``` + +```python +# supervisor/scripts/orchestrate.py +import anthropic +import requests + +claude = anthropic.Anthropic() # reads ANTHROPIC_API_KEY — separate from Max subscription + +def analyze_document(path: str) -> str: + with open(path) as f: + content = f.read() + + # Step 1: local Llama extracts structure (fast, cheap) + structure = requests.post("http://koala:4000/v1/chat/completions", json={ + "model": "llama3-local", + "messages": [{"role": "user", "content": f"Extract key sections from:\n{content}"}], + }).json()["choices"][0]["message"]["content"] + + # Step 2: Claude synthesizes and reasons over it + synthesis = claude.messages.create( + model="claude-sonnet-4-6", + max_tokens=2048, + messages=[{"role": "user", "content": f"Synthesize these findings:\n{structure}"}] + ) + return synthesis.content[0].text +``` + +**When to use**: Batch processing, automated pipelines, workflows triggered by cron or +external events. Not for interactive Claude Code sessions. + +--- + +## What to Skip + +| Approach | Why skip | +|----------|----------| +| `ANTHROPIC_BASE_URL` → LiteLLM | ToS violation with Max subscription (April 2026 terms) | +| Third-party harnesses (OpenClaw etc.) | Explicitly banned for subscription users | +| A2A in Claude Code | Not implemented by Anthropic yet — revisit late 2026 | +| OpenAI agent handoffs | Loses execution context, not worth the complexity | + +--- + +## Protocol Landscape (for awareness, not immediate action) + +- **MCP** — production, 97M monthly downloads, your primary tool-access protocol. LiteLLM + natively supports it as both MCP gateway and MCP client as of v1.60+. +- **A2A v1.0** — Google/Linux Foundation, 150+ orgs in production, but Anthropic has not + shipped it in Claude Code. The intent is agent-to-agent peer delegation (vs MCP's + agent-to-tool). Worth watching for H2 2026. +- **AGNTCY** — Cisco/Linux Foundation, discovery and identity layer beneath MCP+A2A. + Potentially relevant for multi-machine routing across koala/iguana/flamingo once mature. + +--- + +## Build Priority + +| Step | Effort | Value | When | +|------|--------|-------|------| +| Add `model: haiku` to explorer subagents | 10 min | Immediate cost saving | Now | +| Write MCP server for local models | 2–3h | Local model access in sessions | Soon | +| Register MCP server in Claude Code settings | 15 min | Activates pattern 2 | With above | +| Write orchestration script template | 1–2h | Pipeline workflows | When needed | + +--- + +## References + +- LiteLLM MCP docs: https://docs.litellm.ai/docs/mcp +- Community MCP wrapper for LiteLLM: https://github.com/itsDarianNgo/mcp-server-litellm +- Ollama MCP server: https://github.com/rawveg/ollama-mcp +- A2A protocol status: https://www.linuxfoundation.org/press/a2a-protocol-surpasses-150-organizations-lands-in-major-cloud-platforms-and-sees-enterprise-production-use-in-first-year +- AGNTCY: https://github.com/agntcy diff --git a/docs/superpowers/plans/2026-04-17-hyperguild-phase1.md b/docs/superpowers/plans/2026-04-17-hyperguild-phase1.md new file mode 100644 index 0000000..13ad0e4 --- /dev/null +++ b/docs/superpowers/plans/2026-04-17-hyperguild-phase1.md @@ -0,0 +1,2138 @@ +# Hyperguild Phase 1 — Foundation Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add brain access, session logging, tier detection, and a retrospective worker to the supervisor MCP server, turning it into the foundation of the hyperguild SDO. + +**Architecture:** The supervisor repo grows two new subdirectories: `ingestion/` (a separate Go HTTP server that wraps brain file I/O and text search) and `brain/` (the wiki content + session logs). The supervisor MCP server gains four new tool groups — brain, org, sessionlog, retrospective — that call the ingestion server internally or append to local JSONL files. The existing TDD skill handlers are updated to automatically write session log entries after each invocation. + +**Tech Stack:** Go 1.26, net/http (stdlib only), testify, JSONL for session logs, plain text search for brain queries (no Qdrant in Phase 1). + +--- + +## File Map + +**New — ingestion module:** +- `ingestion/go.mod` — separate Go module `github.com/mathiasbq/hyperguild/ingestion` +- `ingestion/cmd/server/main.go` — HTTP server entry point (:3300) +- `ingestion/internal/api/handler.go` — `/query` and `/write` handlers +- `ingestion/internal/api/handler_test.go` +- `ingestion/internal/search/search.go` — full-text search across wiki files +- `ingestion/internal/search/search_test.go` + +**New — supervisor packages:** +- `internal/tier/tier.go` — tier detection by probing endpoints +- `internal/tier/tier_test.go` +- `internal/session/session.go` — append/read JSONL session logs +- `internal/session/session_test.go` +- `internal/skills/brain/skill.go` — brain_query + brain_write MCP tools +- `internal/skills/brain/handlers.go` +- `internal/skills/brain/handlers_test.go` +- `internal/skills/org/skill.go` — tier MCP tool +- `internal/skills/org/handlers.go` +- `internal/skills/org/handlers_test.go` +- `internal/skills/sessionlog/skill.go` — session_log MCP tool +- `internal/skills/sessionlog/handlers.go` +- `internal/skills/sessionlog/handlers_test.go` +- `internal/skills/retrospective/skill.go` — retrospective MCP tool +- `internal/skills/retrospective/handlers.go` +- `internal/skills/retrospective/handlers_test.go` + +**New — config files:** +- `config/supervisor/protocols.md` +- `config/supervisor/retrospective.md` +- `brain/wiki/concepts/.gitkeep` +- `brain/wiki/entities/.gitkeep` +- `brain/wiki/sources/.gitkeep` +- `brain/raw/.gitkeep` +- `brain/sessions/.gitkeep` +- `brain/training-data/sft/.gitkeep` +- `brain/training-data/dpo/.gitkeep` +- `brain/training-data/rl/.gitkeep` + +**Modified:** +- `internal/skills/tdd/handlers.go` — call session_log after each phase +- `internal/config/config.go` — add IngestBaseURL, SessionsDir, BrainDir +- `cmd/supervisor/main.go` — wire new skills +- `config/models.yaml` — add retrospective model +- `Taskfile.yml` — add ingestion server tasks +- `.context/mcp.json` — update server list +- `.env.example` — add new vars + +--- + +## Task 1: ingestion/ module scaffold + +**Files:** +- Create: `ingestion/go.mod` +- Create: `ingestion/cmd/server/main.go` + +- [ ] **Step 1: Create the ingestion go.mod** + +``` +module github.com/mathiasbq/hyperguild/ingestion + +go 1.26.1 + +require github.com/stretchr/testify v1.11.1 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) +``` + +Save as `ingestion/go.mod`. Then run: +```bash +cd ingestion && go mod tidy +``` + +- [ ] **Step 2: Create the server entry point** + +```go +// ingestion/cmd/server/main.go +package main + +import ( + "log/slog" + "net/http" + "os" + + "github.com/mathiasbq/hyperguild/ingestion/internal/api" +) + +func main() { + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) + + brainDir := os.Getenv("INGEST_BRAIN_DIR") + if brainDir == "" { + brainDir = "../brain" + } + + port := os.Getenv("INGEST_PORT") + if port == "" { + port = "3300" + } + + h := api.NewHandler(brainDir, logger) + + mux := http.NewServeMux() + mux.HandleFunc("/query", h.Query) + mux.HandleFunc("/write", h.Write) + + addr := ":" + port + logger.Info("ingestion server starting", "addr", addr, "brain_dir", brainDir) + if err := http.ListenAndServe(addr, mux); err != nil { + logger.Error("server stopped", "err", err) + os.Exit(1) + } +} +``` + +- [ ] **Step 3: Verify it compiles (handler not yet written — expect error)** + +```bash +cd ingestion && go build ./... 2>&1 +``` + +Expected: error about missing `api` package. That's correct — move to Task 2. + +- [ ] **Step 4: Commit scaffold** + +```bash +git add ingestion/ +git commit -m "chore: scaffold ingestion Go module" +``` + +--- + +## Task 2: ingestion search package + +**Files:** +- Create: `ingestion/internal/search/search.go` +- Create: `ingestion/internal/search/search_test.go` + +- [ ] **Step 1: Write the failing test** + +```go +// ingestion/internal/search/search_test.go +package search_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/search" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSearch_ReturnsMatchingPages(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755)) + + // Write a concept page mentioning "retry" + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", "retry-logic.md"), + []byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"), + 0o644, + )) + // Write an unrelated page + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", "database.md"), + []byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"), + 0o644, + )) + + results, err := search.Query(dir, "retry transient", 5) + require.NoError(t, err) + require.Len(t, results, 1) + assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path) + assert.Equal(t, "Retry Logic", results[0].Title) + assert.Greater(t, results[0].Score, 0) + assert.Contains(t, results[0].Excerpt, "Retry") +} + +func TestSearch_RespectsLimit(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755)) + for i := 0; i < 5; i++ { + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)), + []byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)), + 0o644, + )) + } + results, err := search.Query(dir, "retry", 3) + require.NoError(t, err) + assert.LessOrEqual(t, len(results), 3) +} +``` + +Add `"fmt"` import. Run: +```bash +cd ingestion && go test ./internal/search/... 2>&1 +``` +Expected: FAIL — `search` package does not exist. + +- [ ] **Step 2: Implement search.go** + +```go +// ingestion/internal/search/search.go +package search + +import ( + "bufio" + "os" + "path/filepath" + "sort" + "strings" +) + +// Result is a single search hit from the brain wiki. +type Result struct { + Path string `json:"path"` + Title string `json:"title"` + Excerpt string `json:"excerpt"` + Score int `json:"score"` +} + +// Query searches all .md files under brainDir/wiki/ for pages containing +// any of the whitespace-separated terms in query. Returns up to limit results +// sorted by score descending. +func Query(brainDir, query string, limit int) ([]Result, error) { + if limit <= 0 { + limit = 5 + } + terms := strings.Fields(strings.ToLower(query)) + if len(terms) == 0 { + return nil, nil + } + + var results []Result + + err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error { + if err != nil || d.IsDir() || !strings.HasSuffix(path, ".md") { + return err + } + + content, err := os.ReadFile(path) + if err != nil { + return nil // skip unreadable files + } + + lower := strings.ToLower(string(content)) + score := 0 + for _, term := range terms { + score += strings.Count(lower, term) + } + if score == 0 { + return nil + } + + rel, _ := filepath.Rel(brainDir, path) + rel = filepath.ToSlash(rel) + + results = append(results, Result{ + Path: rel, + Title: extractTitle(string(content), d.Name()), + Excerpt: excerpt(string(content), 300), + Score: score, + }) + return nil + }) + if err != nil { + return nil, err + } + + sort.Slice(results, func(i, j int) bool { + return results[i].Score > results[j].Score + }) + if len(results) > limit { + results = results[:limit] + } + return results, nil +} + +func extractTitle(content, filename string) string { + scanner := bufio.NewScanner(strings.NewReader(content)) + inFrontmatter := false + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == "---" { + if !inFrontmatter { + inFrontmatter = true + continue + } + break + } + if inFrontmatter { + key, val, ok := strings.Cut(line, ":") + if ok && strings.TrimSpace(key) == "title" { + return strings.Trim(strings.TrimSpace(val), `"'`) + } + } + } + return strings.TrimSuffix(filename, ".md") +} + +func excerpt(content string, maxLen int) string { + // Skip frontmatter, return first maxLen chars of body. + parts := strings.SplitN(content, "---", 3) + body := content + if len(parts) == 3 { + body = strings.TrimSpace(parts[2]) + } + if len(body) > maxLen { + return body[:maxLen] + "…" + } + return body +} +``` + +- [ ] **Step 3: Run tests — expect PASS** + +```bash +cd ingestion && go test ./internal/search/... -v 2>&1 +``` +Expected: PASS (2 tests). + +- [ ] **Step 4: Commit** + +```bash +git add ingestion/internal/search/ +git commit -m "feat(ingestion): add full-text wiki search package" +``` + +--- + +## Task 3: ingestion API handler + +**Files:** +- Create: `ingestion/internal/api/handler.go` +- Create: `ingestion/internal/api/handler_test.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// ingestion/internal/api/handler_test.go +package api_test + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/api" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "log/slog" +) + +func setup(t *testing.T) (string, *api.Handler) { + t.Helper() + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, "raw"), 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", "tdd.md"), + []byte("---\ntitle: TDD\ndomain: software\n---\n\nTest-driven development is a discipline.\n"), + 0o644, + )) + logger := slog.New(slog.NewTextHandler(os.Stderr, nil)) + return dir, api.NewHandler(dir, logger) +} + +func TestQuery_ReturnsResults(t *testing.T) { + _, h := setup(t) + body, _ := json.Marshal(map[string]any{"query": "test driven", "limit": 5}) + req := httptest.NewRequest(http.MethodPost, "/query", bytes.NewReader(body)) + rec := httptest.NewRecorder() + + h.Query(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + var resp map[string]any + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) + results := resp["results"].([]any) + assert.NotEmpty(t, results) +} + +func TestWrite_CreatesRawFile(t *testing.T) { + dir, h := setup(t) + body, _ := json.Marshal(map[string]any{ + "content": "# Test note\n\nSome content.", + "filename": "test-note.md", + }) + req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body)) + rec := httptest.NewRecorder() + + h.Write(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + var resp map[string]string + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) + assert.NotEmpty(t, resp["path"]) + + written := filepath.Join(dir, "raw", "test-note.md") + content, err := os.ReadFile(written) + require.NoError(t, err) + assert.Contains(t, string(content), "Some content.") +} + +func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) { + dir, h := setup(t) + body, _ := json.Marshal(map[string]any{"content": "auto name"}) + req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body)) + rec := httptest.NewRecorder() + + h.Write(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + entries, _ := os.ReadDir(filepath.Join(dir, "raw")) + assert.Len(t, entries, 1) + assert.True(t, strings.HasSuffix(entries[0].Name(), ".md")) +} +``` + +Run: +```bash +cd ingestion && go test ./internal/api/... 2>&1 +``` +Expected: FAIL — package `api` does not exist. + +- [ ] **Step 2: Implement handler.go** + +```go +// ingestion/internal/api/handler.go +package api + +import ( + "encoding/json" + "fmt" + "log/slog" + "net/http" + "os" + "path/filepath" + "time" + + "github.com/mathiasbq/hyperguild/ingestion/internal/search" +) + +// Handler serves the ingestion HTTP API. +type Handler struct { + brainDir string + logger *slog.Logger +} + +// NewHandler constructs a Handler. brainDir is the absolute path to brain/. +func NewHandler(brainDir string, logger *slog.Logger) *Handler { + return &Handler{brainDir: brainDir, logger: logger} +} + +type queryRequest struct { + Query string `json:"query"` + Domain string `json:"domain,omitempty"` + Limit int `json:"limit,omitempty"` +} + +type writeRequest struct { + Content string `json:"content"` + Filename string `json:"filename,omitempty"` +} + +// Query handles POST /query — full-text search across the brain wiki. +func (h *Handler) Query(w http.ResponseWriter, r *http.Request) { + var req queryRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "invalid JSON", http.StatusBadRequest) + return + } + if req.Limit == 0 { + req.Limit = 5 + } + + results, err := search.Query(h.brainDir, req.Query, req.Limit) + if err != nil { + h.logger.Error("query failed", "err", err) + http.Error(w, "search error", http.StatusInternalServerError) + return + } + + writeJSON(w, map[string]any{"results": results}) +} + +// Write handles POST /write — write raw content to brain/raw/. +func (h *Handler) Write(w http.ResponseWriter, r *http.Request) { + var req writeRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "invalid JSON", http.StatusBadRequest) + return + } + if req.Content == "" { + http.Error(w, "content is required", http.StatusBadRequest) + return + } + + filename := req.Filename + if filename == "" { + filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405")) + } + + rawDir := filepath.Join(h.brainDir, "raw") + if err := os.MkdirAll(rawDir, 0o755); err != nil { + http.Error(w, "failed to create raw dir", http.StatusInternalServerError) + return + } + + dest := filepath.Join(rawDir, filepath.Base(filename)) + if err := os.WriteFile(dest, []byte(req.Content), 0o644); err != nil { + h.logger.Error("write failed", "err", err) + http.Error(w, "write error", http.StatusInternalServerError) + return + } + + rel, _ := filepath.Rel(h.brainDir, dest) + writeJSON(w, map[string]string{"path": filepath.ToSlash(rel)}) +} + +func writeJSON(w http.ResponseWriter, v any) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(v) +} +``` + +- [ ] **Step 3: Run tests — expect PASS** + +```bash +cd ingestion && go test ./internal/api/... -v 2>&1 +``` +Expected: PASS (3 tests). + +- [ ] **Step 4: Verify full build** + +```bash +cd ingestion && go build ./... 2>&1 +``` +Expected: clean. + +- [ ] **Step 5: Commit** + +```bash +git add ingestion/internal/ +git commit -m "feat(ingestion): add query and write HTTP handlers" +``` + +--- + +## Task 4: internal/tier — tier detection + +**Files:** +- Create: `internal/tier/tier.go` +- Create: `internal/tier/tier_test.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// internal/tier/tier_test.go +package tier_test + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/mathiasbq/supervisor/internal/tier" + "github.com/stretchr/testify/assert" +) + +func TestDetect_Tier1_WhenBothReachable(t *testing.T) { + anthropic := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer anthropic.Close() + + litellm := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer litellm.Close() + + info := tier.Detect(context.Background(), anthropic.URL, litellm.URL) + assert.Equal(t, tier.Full, info.Tier) + assert.Equal(t, "full-online", info.Label) + assert.True(t, info.ManagedAgents) +} + +func TestDetect_Tier2_WhenOnlyLiteLLMReachable(t *testing.T) { + litellm := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer litellm.Close() + + info := tier.Detect(context.Background(), "http://127.0.0.1:1", litellm.URL) + assert.Equal(t, tier.LANOnly, info.Tier) + assert.Equal(t, "lan-only", info.Label) + assert.False(t, info.ManagedAgents) +} + +func TestDetect_Tier3_WhenNeitherReachable(t *testing.T) { + info := tier.Detect(context.Background(), "http://127.0.0.1:1", "http://127.0.0.1:2") + assert.Equal(t, tier.Airplane, info.Tier) + assert.Equal(t, "airplane", info.Label) + assert.False(t, info.ManagedAgents) +} +``` + +Run: +```bash +go test ./internal/tier/... 2>&1 +``` +Expected: FAIL — package does not exist. + +- [ ] **Step 2: Implement tier.go** + +```go +// internal/tier/tier.go +package tier + +import ( + "context" + "net/http" + "time" +) + +// Tier represents the current operating capability level. +type Tier int + +const ( + Full Tier = 1 // internet + Anthropic API reachable + LANOnly Tier = 2 // LiteLLM on LAN reachable, no internet + Airplane Tier = 3 // no network +) + +// Info describes the current operating tier. +type Info struct { + Tier Tier `json:"tier"` + Label string `json:"label"` + AvailableModels []string `json:"available_models"` + ManagedAgents bool `json:"managed_agents"` +} + +// Detect probes the Anthropic endpoint and LiteLLM and returns the current tier. +// probeTimeout is 2 seconds per probe. +func Detect(ctx context.Context, anthropicProbe, liteLLMBaseURL string) Info { + client := &http.Client{Timeout: 2 * time.Second} + + if probe(ctx, client, anthropicProbe) { + return Info{ + Tier: Full, + Label: "full-online", + ManagedAgents: true, + } + } + if probe(ctx, client, liteLLMBaseURL) { + return Info{ + Tier: LANOnly, + Label: "lan-only", + ManagedAgents: false, + } + } + return Info{ + Tier: Airplane, + Label: "airplane", + ManagedAgents: false, + } +} + +func probe(ctx context.Context, client *http.Client, url string) bool { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return false + } + resp, err := client.Do(req) + if err != nil { + return false + } + resp.Body.Close() + return true +} +``` + +- [ ] **Step 3: Run tests — expect PASS** + +```bash +go test ./internal/tier/... -v 2>&1 +``` +Expected: PASS (3 tests). Note: Tier2/Tier3 tests will take ~4s due to connection timeouts on port 1/2. + +- [ ] **Step 4: Commit** + +```bash +git add internal/tier/ +git commit -m "feat: add tier detection package" +``` + +--- + +## Task 5: internal/session — session log + +**Files:** +- Create: `internal/session/session.go` +- Create: `internal/session/session_test.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// internal/session/session_test.go +package session_test + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/mathiasbq/supervisor/internal/session" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAppend_WritesJSONLEntry(t *testing.T) { + dir := t.TempDir() + entry := session.Entry{ + SessionID: "test-session-1", + Timestamp: time.Now().UTC(), + Skill: "tdd_green", + Phase: "green", + ProjectRoot: "/tmp/myproject", + FinalStatus: "pass", + ModelUsed: "ollama/qwen3", + DurationMs: 5000, + } + + require.NoError(t, session.Append(dir, "test-session-1", entry)) + + path := filepath.Join(dir, "test-session-1.jsonl") + data, err := os.ReadFile(path) + require.NoError(t, err) + + var got session.Entry + require.NoError(t, json.Unmarshal(data, &got)) + assert.Equal(t, "test-session-1", got.SessionID) + assert.Equal(t, "tdd_green", got.Skill) + assert.Equal(t, "pass", got.FinalStatus) +} + +func TestAppend_AppendsMultipleEntries(t *testing.T) { + dir := t.TempDir() + for i := 0; i < 3; i++ { + require.NoError(t, session.Append(dir, "s1", session.Entry{ + SessionID: "s1", + Timestamp: time.Now().UTC(), + Skill: "tdd_red", + FinalStatus: "pass", + })) + } + + entries, err := session.Read(dir, "s1") + require.NoError(t, err) + assert.Len(t, entries, 3) +} + +func TestRead_EmptyWhenNoFile(t *testing.T) { + dir := t.TempDir() + entries, err := session.Read(dir, "missing") + require.NoError(t, err) + assert.Empty(t, entries) +} +``` + +Run: +```bash +go test ./internal/session/... 2>&1 +``` +Expected: FAIL. + +- [ ] **Step 2: Implement session.go** + +```go +// internal/session/session.go +package session + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" +) + +// Entry is one skill invocation record, appended to the session JSONL log. +type Entry struct { + SessionID string `json:"session_id"` + Timestamp time.Time `json:"timestamp"` + Skill string `json:"skill"` + Phase string `json:"phase,omitempty"` + ProjectRoot string `json:"project_root,omitempty"` + Input json.RawMessage `json:"input,omitempty"` + Attempts []Attempt `json:"attempts,omitempty"` + FinalStatus string `json:"final_status"` + FilePath string `json:"file_path,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + DurationMs int64 `json:"duration_ms,omitempty"` +} + +// Attempt represents one subprocess invocation within a skill call. +type Attempt struct { + Attempt int `json:"attempt"` + Model string `json:"model"` + OutputSummary string `json:"output_summary,omitempty"` + RunnerOutput string `json:"runner_output,omitempty"` + Verified bool `json:"verified"` +} + +// Append writes entry as a single JSON line to sessionsDir/{sessionID}.jsonl. +func Append(sessionsDir, sessionID string, entry Entry) error { + if err := os.MkdirAll(sessionsDir, 0o755); err != nil { + return fmt.Errorf("create sessions dir: %w", err) + } + path := filepath.Join(sessionsDir, sessionID+".jsonl") + f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("open session log: %w", err) + } + defer f.Close() + + line, err := json.Marshal(entry) + if err != nil { + return fmt.Errorf("marshal entry: %w", err) + } + _, err = fmt.Fprintf(f, "%s\n", line) + return err +} + +// Read returns all entries for sessionID. Returns empty slice if no log exists. +func Read(sessionsDir, sessionID string) ([]Entry, error) { + path := filepath.Join(sessionsDir, sessionID+".jsonl") + f, err := os.Open(path) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("open session log: %w", err) + } + defer f.Close() + + var entries []Entry + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + var e Entry + if err := json.Unmarshal(line, &e); err != nil { + return nil, fmt.Errorf("parse entry: %w", err) + } + entries = append(entries, e) + } + return entries, scanner.Err() +} +``` + +- [ ] **Step 3: Run tests — expect PASS** + +```bash +go test ./internal/session/... -v 2>&1 +``` +Expected: PASS (3 tests). + +- [ ] **Step 4: Commit** + +```bash +git add internal/session/ +git commit -m "feat: add session log package (append/read JSONL)" +``` + +--- + +## Task 6: brain skill (brain_query, brain_write MCP tools) + +**Files:** +- Create: `internal/skills/brain/skill.go` +- Create: `internal/skills/brain/handlers.go` +- Create: `internal/skills/brain/handlers_test.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// internal/skills/brain/handlers_test.go +package brain_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/mathiasbq/supervisor/internal/skills/brain" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHandle_BrainQuery_CallsIngestServer(t *testing.T) { + called := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/query", r.URL.Path) + called = true + json.NewEncoder(w).Encode(map[string]any{ + "results": []map[string]any{ + {"path": "wiki/concepts/tdd.md", "title": "TDD", "excerpt": "Test-driven development.", "score": 3}, + }, + }) + })) + defer srv.Close() + + s := brain.New(brain.Config{IngestBaseURL: srv.URL}) + args, _ := json.Marshal(map[string]string{"query": "test driven development"}) + out, err := s.Handle(context.Background(), "brain_query", args) + require.NoError(t, err) + assert.True(t, called) + + var result map[string]any + require.NoError(t, json.Unmarshal(out, &result)) + results := result["results"].([]any) + assert.Len(t, results, 1) +} + +func TestHandle_BrainWrite_CallsIngestServer(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/write", r.URL.Path) + json.NewEncoder(w).Encode(map[string]string{"path": "raw/test.md"}) + })) + defer srv.Close() + + s := brain.New(brain.Config{IngestBaseURL: srv.URL}) + args, _ := json.Marshal(map[string]string{"content": "# Test\n\nSome learning.", "type": "concept"}) + out, err := s.Handle(context.Background(), "brain_write", args) + require.NoError(t, err) + var result map[string]string + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "raw/test.md", result["path"]) +} + +func TestHandle_UnknownTool_ReturnsError(t *testing.T) { + s := brain.New(brain.Config{IngestBaseURL: "http://localhost:3300"}) + _, err := s.Handle(context.Background(), "brain_unknown", nil) + assert.Error(t, err) +} +``` + +Run: +```bash +go test ./internal/skills/brain/... 2>&1 +``` +Expected: FAIL. + +- [ ] **Step 2: Implement skill.go** + +```go +// internal/skills/brain/skill.go +package brain + +import ( + "context" + "encoding/json" + + "github.com/mathiasbq/supervisor/internal/registry" +) + +// Config holds brain skill configuration. +type Config struct { + IngestBaseURL string // base URL of the ingestion HTTP server +} + +// Skill implements registry.Skill for brain_query and brain_write. +type Skill struct { + cfg Config +} + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "brain" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + num := map[string]any{"type": "integer"} + + return []registry.ToolDef{ + { + Name: "brain_query", + Description: "Search the hyperguild brain wiki for relevant knowledge. Call this before starting any significant task.", + InputSchema: schema([]string{"query"}, map[string]any{ + "query": str, + "domain": str, + "limit": num, + }), + }, + { + Name: "brain_write", + Description: "Write a raw knowledge note to the brain for later ingestion into the wiki.", + InputSchema: schema([]string{"content"}, map[string]any{ + "content": str, + "type": str, + "domain": str, + "filename": str, + }), + }, + } +} +``` + +- [ ] **Step 3: Implement handlers.go** + +```go +// internal/skills/brain/handlers.go +package brain + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + switch tool { + case "brain_query": + return s.query(ctx, args) + case "brain_write": + return s.write(ctx, args) + default: + return nil, fmt.Errorf("unknown brain tool: %s", tool) + } +} + +type queryArgs struct { + Query string `json:"query"` + Domain string `json:"domain,omitempty"` + Limit int `json:"limit,omitempty"` +} + +func (s *Skill) query(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { + var a queryArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.Query == "" { + return nil, fmt.Errorf("query is required") + } + if a.Limit == 0 { + a.Limit = 5 + } + return s.post(ctx, "/query", a) +} + +type writeArgs struct { + Content string `json:"content"` + Type string `json:"type,omitempty"` + Domain string `json:"domain,omitempty"` + Filename string `json:"filename,omitempty"` +} + +func (s *Skill) write(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { + var a writeArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.Content == "" { + return nil, fmt.Errorf("content is required") + } + return s.post(ctx, "/write", map[string]string{ + "content": a.Content, + "filename": a.Filename, + }) +} + +func (s *Skill) post(ctx context.Context, path string, body any) (json.RawMessage, error) { + b, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("marshal request: %w", err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, s.cfg.IngestBaseURL+path, bytes.NewReader(b)) + if err != nil { + return nil, fmt.Errorf("build request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("call ingestion server: %w", err) + } + defer resp.Body.Close() + + out, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read response: %w", err) + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("ingestion server returned %d: %s", resp.StatusCode, out) + } + return json.RawMessage(out), nil +} +``` + +- [ ] **Step 4: Run tests — expect PASS** + +```bash +go test ./internal/skills/brain/... -v 2>&1 +``` +Expected: PASS (3 tests). + +- [ ] **Step 5: Commit** + +```bash +git add internal/skills/brain/ +git commit -m "feat: add brain_query and brain_write MCP tools" +``` + +--- + +## Task 7: org skill (tier tool) + sessionlog skill (session_log tool) + +**Files:** +- Create: `internal/skills/org/skill.go` +- Create: `internal/skills/org/handlers.go` +- Create: `internal/skills/org/handlers_test.go` +- Create: `internal/skills/sessionlog/skill.go` +- Create: `internal/skills/sessionlog/handlers.go` +- Create: `internal/skills/sessionlog/handlers_test.go` + +- [ ] **Step 1: Write failing tests for org skill** + +```go +// internal/skills/org/handlers_test.go +package org_test + +import ( + "context" + "encoding/json" + "testing" + + "github.com/mathiasbq/supervisor/internal/skills/org" + "github.com/mathiasbq/supervisor/internal/tier" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHandle_Tier_ReturnsTierInfo(t *testing.T) { + s := org.New(org.Config{ + TierFn: func(ctx context.Context) tier.Info { + return tier.Info{Tier: tier.LANOnly, Label: "lan-only", ManagedAgents: false} + }, + }) + out, err := s.Handle(context.Background(), "tier", nil) + require.NoError(t, err) + + var info tier.Info + require.NoError(t, json.Unmarshal(out, &info)) + assert.Equal(t, tier.LANOnly, info.Tier) + assert.Equal(t, "lan-only", info.Label) + assert.False(t, info.ManagedAgents) +} +``` + +Run: +```bash +go test ./internal/skills/org/... 2>&1 +``` +Expected: FAIL. + +- [ ] **Step 2: Implement org skill** + +```go +// internal/skills/org/skill.go +package org + +import ( + "context" + "encoding/json" + + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/mathiasbq/supervisor/internal/tier" +) + +// TierFn is a function that returns the current tier. Injected for testability. +type TierFn func(ctx context.Context) tier.Info + +// Config holds org skill configuration. +type Config struct { + TierFn TierFn +} + +// Skill implements registry.Skill for the tier tool. +type Skill struct { + cfg Config +} + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "org" } + +func (s *Skill) Tools() []registry.ToolDef { + return []registry.ToolDef{ + { + Name: "tier", + Description: "Returns the current operating tier: 1=full-online (Claude+Ollama+Managed Agents), 2=lan-only (Ollama only), 3=airplane (minimal). Call at session start to know which models and capabilities are available.", + InputSchema: json.RawMessage(`{"type":"object","properties":{}}`), + }, + } +} +``` + +```go +// internal/skills/org/handlers.go +package org + +import ( + "context" + "encoding/json" + "fmt" +) + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "tier" { + return nil, fmt.Errorf("unknown org tool: %s", tool) + } + info := s.cfg.TierFn(ctx) + b, err := json.Marshal(info) + if err != nil { + return nil, fmt.Errorf("marshal tier info: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 3: Write failing tests for sessionlog skill** + +```go +// internal/skills/sessionlog/handlers_test.go +package sessionlog_test + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/supervisor/internal/skills/sessionlog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHandle_SessionLog_AppendsEntry(t *testing.T) { + dir := t.TempDir() + s := sessionlog.New(sessionlog.Config{SessionsDir: dir}) + + args, _ := json.Marshal(map[string]any{ + "session_id": "sess-abc", + "skill": "tdd_green", + "final_status": "pass", + "model_used": "ollama/qwen3", + "duration_ms": 3000, + }) + out, err := s.Handle(context.Background(), "session_log", args) + require.NoError(t, err) + var result map[string]string + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "ok", result["status"]) + + // Verify file written + data, err := os.ReadFile(filepath.Join(dir, "sess-abc.jsonl")) + require.NoError(t, err) + assert.Contains(t, string(data), "tdd_green") +} + +func TestHandle_SessionLog_RequiresSessionID(t *testing.T) { + s := sessionlog.New(sessionlog.Config{SessionsDir: t.TempDir()}) + args, _ := json.Marshal(map[string]any{"skill": "tdd_red"}) + _, err := s.Handle(context.Background(), "session_log", args) + assert.Error(t, err) +} +``` + +Run: +```bash +go test ./internal/skills/sessionlog/... 2>&1 +``` +Expected: FAIL. + +- [ ] **Step 4: Implement sessionlog skill** + +```go +// internal/skills/sessionlog/skill.go +package sessionlog + +import ( + "context" + "encoding/json" + + "github.com/mathiasbq/supervisor/internal/registry" +) + +// Config holds sessionlog skill configuration. +type Config struct { + SessionsDir string // path to brain/sessions/ +} + +// Skill implements registry.Skill for the session_log tool. +type Skill struct { + cfg Config +} + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "sessionlog" } + +func (s *Skill) Tools() []registry.ToolDef { + return []registry.ToolDef{ + { + Name: "session_log", + Description: "Append a structured entry to the current session log. Call after each skill invocation completes to record what happened for retrospective and training data extraction.", + InputSchema: json.RawMessage(`{ + "type": "object", + "required": ["session_id"], + "properties": { + "session_id": {"type": "string"}, + "skill": {"type": "string"}, + "phase": {"type": "string"}, + "project_root": {"type": "string"}, + "final_status": {"type": "string"}, + "file_path": {"type": "string"}, + "model_used": {"type": "string"}, + "duration_ms": {"type": "integer"}, + "message": {"type": "string"} + } + }`), + }, + } +} +``` + +```go +// internal/skills/sessionlog/handlers.go +package sessionlog + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/mathiasbq/supervisor/internal/session" +) + +type logArgs struct { + SessionID string `json:"session_id"` + Skill string `json:"skill"` + Phase string `json:"phase,omitempty"` + ProjectRoot string `json:"project_root,omitempty"` + FinalStatus string `json:"final_status,omitempty"` + FilePath string `json:"file_path,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + DurationMs int64 `json:"duration_ms,omitempty"` + Message string `json:"message,omitempty"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "session_log" { + return nil, fmt.Errorf("unknown sessionlog tool: %s", tool) + } + var a logArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + + entry := session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now().UTC(), + Skill: a.Skill, + Phase: a.Phase, + ProjectRoot: a.ProjectRoot, + FinalStatus: a.FinalStatus, + FilePath: a.FilePath, + ModelUsed: a.ModelUsed, + DurationMs: a.DurationMs, + } + if err := session.Append(s.cfg.SessionsDir, a.SessionID, entry); err != nil { + return nil, fmt.Errorf("append session log: %w", err) + } + b, _ := json.Marshal(map[string]string{"status": "ok", "session_id": a.SessionID}) + return b, nil +} +``` + +- [ ] **Step 5: Run all new tests** + +```bash +go test ./internal/skills/org/... ./internal/skills/sessionlog/... -v 2>&1 +``` +Expected: PASS (all tests). + +- [ ] **Step 6: Commit** + +```bash +git add internal/skills/org/ internal/skills/sessionlog/ +git commit -m "feat: add tier and session_log MCP tools" +``` + +--- + +## Task 8: retrospective skill + +**Files:** +- Create: `internal/skills/retrospective/skill.go` +- Create: `internal/skills/retrospective/handlers.go` +- Create: `internal/skills/retrospective/handlers_test.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// internal/skills/retrospective/handlers_test.go +package retrospective_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/retrospective" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHandle_Retrospective_RequiresSessionID(t *testing.T) { + s := retrospective.New(retrospective.Config{}) + _, err := s.Handle(context.Background(), "retrospective", json.RawMessage(`{}`)) + assert.Error(t, err) + assert.Contains(t, err.Error(), "session_id") +} + +func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) { + var capturedReq iexec.Request + s := retrospective.New(retrospective.Config{ + SkillPrompt: "retrospective discipline", + DefaultModel: "ollama/test", + SessionsDir: "testdata", + ExecutorFn: func(_ context.Context, req iexec.Request) (iexec.Result, error) { + capturedReq = req + return iexec.Result{ + Status: "pass", + Phase: "retrospective", + Skill: "retrospective", + Verified: true, + Message: "wrote 2 entries to brain", + }, nil + }, + }) + + args, _ := json.Marshal(map[string]string{"session_id": "empty-session"}) + out, err := s.Handle(context.Background(), "retrospective", args) + require.NoError(t, err) + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "pass", result.Status) + assert.Contains(t, capturedReq.SkillPrompt, "retrospective discipline") + assert.Contains(t, capturedReq.TaskPrompt, "empty-session") +} +``` + +Run: +```bash +go test ./internal/skills/retrospective/... 2>&1 +``` +Expected: FAIL. + +- [ ] **Step 2: Implement skill.go** + +```go +// internal/skills/retrospective/skill.go +package retrospective + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn allows injecting a test double. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds retrospective skill configuration. +type Config struct { + SkillPrompt string + DefaultModel string + SessionsDir string // path to brain/sessions/ + ExecutorFn ExecutorFn +} + +// Skill implements registry.Skill for the retrospective tool. +type Skill struct { + cfg Config +} + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "retrospective" } + +func (s *Skill) Tools() []registry.ToolDef { + return []registry.ToolDef{ + { + Name: "retrospective", + Description: "Run a retrospective on a completed session. Reads the session log, identifies novel learnings, and writes structured entries to the brain for ingestion. Call at the end of each coding session.", + InputSchema: json.RawMessage(`{ + "type": "object", + "required": ["session_id"], + "properties": { + "session_id": {"type": "string"}, + "model": {"type": "string"} + } + }`), + }, + } +} +``` + +- [ ] **Step 3: Implement handlers.go** + +```go +// internal/skills/retrospective/handlers.go +package retrospective + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type retroArgs struct { + SessionID string `json:"session_id"` + Model string `json:"model,omitempty"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "retrospective" { + return nil, fmt.Errorf("unknown retrospective tool: %s", tool) + } + var a retroArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + // Read session log entries. + entries, err := session.Read(s.cfg.SessionsDir, a.SessionID) + if err != nil { + return nil, fmt.Errorf("read session log: %w", err) + } + + logJSON, _ := json.MarshalIndent(entries, "", " ") + taskPrompt := fmt.Sprintf( + "SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Write structured entries to brain/raw/ via brain_write. Return JSON result when done.", + a.SessionID, string(logJSON), + ) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: taskPrompt, + Model: model, + Tools: "Bash,Read,Write", + }) + if err != nil { + return nil, fmt.Errorf("retrospective worker: %w", err) + } + + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 4: Run tests — expect PASS** + +```bash +go test ./internal/skills/retrospective/... -v 2>&1 +``` +Expected: PASS (2 tests). + +- [ ] **Step 5: Commit** + +```bash +git add internal/skills/retrospective/ +git commit -m "feat: add retrospective MCP tool" +``` + +--- + +## Task 9: config files and brain directory structure + +**Files:** +- Create: `config/supervisor/protocols.md` +- Create: `config/supervisor/retrospective.md` +- Create: `brain/` directory structure (gitkeep files) + +- [ ] **Step 1: Write protocols.md** + +```markdown + +# The Hyperguild Way + +These protocols are injected into every worker invocation. They define how you behave as a member of the hyperguild. + +## Output contract + +Every response is raw JSON matching the response schema. No preamble, no prose, no markdown. Malformed output is treated as a failed invocation. + +## Quality gate + +`verified: true` only when a subprocess exit code confirms the outcome. Never self-assess. "I think the tests pass" is not verified. + +## Escalation + +If stuck after 3 attempts, return `status: error` with a clear `message` explaining why. Do not retry silently. Do not fabricate a passing result. + +## Working offline + +If brain context is absent from your prompt, proceed using your discipline file only. Note the gap in your `message` field: "no brain context available". + +## Handoff format + +Structure your output so the next worker in a chain can consume it without transformation. Use the standard result schema. Do not add extra fields. + +## Session logging + +The Go skill handler records your invocation in the session log automatically. You do not need to do this yourself. +``` + +- [ ] **Step 2: Write retrospective.md** + +```markdown + +# Retrospective Worker Discipline + +You are the retrospective worker. Your job is to review a completed coding session and identify knowledge worth preserving in the hyperguild brain. + +## What you receive + +- A session log in JSON format listing every skill invocation: what was attempted, what failed, what passed, how long it took. + +## What you produce + +For each significant learning, call brain_write with a structured markdown note. Then return a JSON result summarising what you wrote. + +## What is worth preserving + +- Patterns that worked and should be repeated +- Failures that revealed something non-obvious about the codebase or the discipline +- Decisions made during the session (architectural, structural, tooling) +- Anything that contradicts or extends what the brain already knows + +## What is NOT worth preserving + +- Routine TDD cycles with no surprises +- Single-attempt passes with no interesting context +- Mechanical operations (file moves, renames, formatting) + +## Output format + +Return JSON matching the standard result schema: + +```json +{ + "status": "pass", + "phase": "retrospective", + "skill": "retrospective", + "verified": true, + "message": "wrote N entries to brain/raw/" +} +``` + +`verified` is true when you successfully called brain_write at least once and received a confirmation. If the session had nothing worth writing, return `verified: true` with `message: "no novel learnings in this session"`. +``` + +- [ ] **Step 3: Create brain directory structure** + +```bash +mkdir -p brain/wiki/concepts brain/wiki/entities brain/wiki/sources +mkdir -p brain/raw brain/sessions +mkdir -p brain/training-data/sft brain/training-data/dpo brain/training-data/rl +touch brain/wiki/concepts/.gitkeep brain/wiki/entities/.gitkeep brain/wiki/sources/.gitkeep +touch brain/raw/.gitkeep brain/sessions/.gitkeep +touch brain/training-data/sft/.gitkeep brain/training-data/dpo/.gitkeep brain/training-data/rl/.gitkeep +``` + +- [ ] **Step 4: Add brain/ to .gitignore exceptions** + +Edit `.gitignore` — add after the existing Binaries section: + +```gitignore +# Brain content — keep wiki and structure, exclude session logs and training data +brain/sessions/*.jsonl +brain/training-data/**/*.jsonl +``` + +- [ ] **Step 5: Commit** + +```bash +git add config/supervisor/protocols.md config/supervisor/retrospective.md brain/ +git commit -m "feat: add protocols.md, retrospective discipline, and brain directory structure" +``` + +--- + +## Task 10: update config and wire new skills in main.go + +**Files:** +- Modify: `internal/config/config.go` +- Modify: `cmd/supervisor/main.go` +- Modify: `config/models.yaml` +- Modify: `.env.example` + +- [ ] **Step 1: Extend config.go** + +Add to the `Config` struct and `Load()` in `internal/config/config.go`: + +```go +// Add to Config struct: +IngestBaseURL string // INGEST_BASE_URL, default http://localhost:3300 +SessionsDir string // SUPERVISOR_SESSIONS_DIR, default ./brain/sessions +BrainDir string // SUPERVISOR_BRAIN_DIR, default ./brain + +// Add to Load(): +cfg.IngestBaseURL = envOr("INGEST_BASE_URL", "http://localhost:3300") +cfg.SessionsDir = envOr("SUPERVISOR_SESSIONS_DIR", "./brain/sessions") +cfg.BrainDir = envOr("SUPERVISOR_BRAIN_DIR", "./brain") +``` + +- [ ] **Step 2: Update config_test.go to cover new fields** + +Add to `TestLoad_Defaults` in `internal/config/config_test.go`: + +```go +assert.Equal(t, "http://localhost:3300", cfg.IngestBaseURL) +assert.Equal(t, "./brain/sessions", cfg.SessionsDir) +assert.Equal(t, "./brain", cfg.BrainDir) +``` + +Run: +```bash +go test ./internal/config/... -v 2>&1 +``` +Expected: PASS. + +- [ ] **Step 3: Add retrospective model to config/models.yaml** + +```yaml +default: ollama/qwen3-coder-30b-tuned + +skills: + tdd: ollama/qwen3-coder-30b-tuned + review: ollama/devstral-tuned + debug: ollama/deepseek-r1-tuned + retrospective: ollama/qwen3-coder-30b-tuned + trainer: ollama/qwen3-coder-30b-tuned +``` + +- [ ] **Step 4: Rewrite cmd/supervisor/main.go** + +```go +// cmd/supervisor/main.go +package main + +import ( + "context" + "log/slog" + "net/http" + "os" + + "github.com/mathiasbq/supervisor/internal/config" + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/mcp" + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/mathiasbq/supervisor/internal/skills/brain" + "github.com/mathiasbq/supervisor/internal/skills/org" + "github.com/mathiasbq/supervisor/internal/skills/retrospective" + "github.com/mathiasbq/supervisor/internal/skills/sessionlog" + "github.com/mathiasbq/supervisor/internal/skills/tdd" + "github.com/mathiasbq/supervisor/internal/tier" +) + +func main() { + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) + + cfg, err := config.Load() + if err != nil { + logger.Error("load config", "err", err) + os.Exit(1) + } + + models, err := config.LoadModels(cfg.ModelsFile) + if err != nil { + logger.Error("load models", "err", err) + os.Exit(1) + } + + systemPrompt, err := os.ReadFile(cfg.ConfigDir + "/CLAUDE.md") + if err != nil { + logger.Error("read supervisor CLAUDE.md", "err", err) + os.Exit(1) + } + + tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md") + if err != nil { + logger.Error("read tdd.md", "err", err) + os.Exit(1) + } + + retroPrompt, err := os.ReadFile(cfg.ConfigDir + "/retrospective.md") + if err != nil { + logger.Error("read retrospective.md", "err", err) + os.Exit(1) + } + + executor := iexec.New(iexec.Config{ + SystemPrompt: string(systemPrompt), + LiteLLMBaseURL: cfg.LiteLLMBaseURL, + LiteLLMAPIKey: cfg.LiteLLMAPIKey, + }) + + tierFn := func(ctx context.Context) tier.Info { + return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL) + } + + reg := registry.New() + reg.Register(tdd.New(tdd.Config{ + SkillPrompt: string(tddPrompt), + DefaultModel: models.Resolve("tdd", ""), + ExecutorFn: executor.Run, + })) + reg.Register(brain.New(brain.Config{ + IngestBaseURL: cfg.IngestBaseURL, + })) + reg.Register(org.New(org.Config{ + TierFn: tierFn, + })) + reg.Register(sessionlog.New(sessionlog.Config{ + SessionsDir: cfg.SessionsDir, + })) + reg.Register(retrospective.New(retrospective.Config{ + SkillPrompt: string(retroPrompt), + DefaultModel: models.Resolve("retrospective", ""), + SessionsDir: cfg.SessionsDir, + ExecutorFn: executor.Run, + })) + + srv := mcp.NewServer(reg) + mux := http.NewServeMux() + mux.Handle("/mcp", srv) + + addr := ":" + cfg.Port + logger.Info("supervisor starting", "addr", addr) + if err := http.ListenAndServe(addr, mux); err != nil { + logger.Error("server stopped", "err", err) + os.Exit(1) + } +} +``` + +- [ ] **Step 5: Update .env.example** + +Add to `.env.example`: +```bash +# Ingestion server +INGEST_BASE_URL=http://localhost:3300 +INGEST_PORT=3300 +INGEST_BRAIN_DIR=./brain + +# Brain directories +SUPERVISOR_SESSIONS_DIR=./brain/sessions +SUPERVISOR_BRAIN_DIR=./brain +``` + +- [ ] **Step 6: Build to verify no compile errors** + +```bash +go build ./... 2>&1 +``` +Expected: clean. + +- [ ] **Step 7: Run all tests** + +```bash +go test ./... 2>&1 +``` +Expected: all PASS. + +- [ ] **Step 8: Commit** + +```bash +git add internal/config/ cmd/supervisor/main.go config/models.yaml .env.example +git commit -m "feat: wire brain, org, sessionlog, retrospective skills into supervisor" +``` + +--- + +## Task 11: Taskfile and MCP registration + +**Files:** +- Modify: `Taskfile.yml` +- Modify: `.context/mcp.json` + +- [ ] **Step 1: Add ingestion server tasks to Taskfile.yml** + +Add the following tasks to `Taskfile.yml`: + +```yaml + ingestion:build: + desc: Build ingestion server binary + cmds: + - go build -o bin/ingestion-server ./cmd/server + dir: ingestion + + ingestion:dev: + desc: Run ingestion server in development mode + env: + INGEST_BRAIN_DIR: "{{.ROOT_DIR}}/brain" + INGEST_PORT: "3300" + cmds: + - go run ./cmd/server + dir: ingestion + + ingestion:test: + desc: Run ingestion tests + cmds: + - go test ./... -v + dir: ingestion + + dev:all: + desc: Start both supervisor and ingestion server (requires two terminals) + cmds: + - echo "Start ingestion: task ingestion:dev" + - echo "Start supervisor: task supervisor:dev" +``` + +- [ ] **Step 2: Update .context/mcp.json** + +Update `.context/mcp.json` to reflect the expanded tool set: + +```json +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp" + }, + "supervisor": { + "url": "http://localhost:3200/mcp", + "description": "Hyperguild SDO — skill workers (tdd, retrospective), brain tools (brain_query, brain_write), session logging, tier detection" + } + } +} +``` + +- [ ] **Step 3: Commit** + +```bash +git add Taskfile.yml .context/mcp.json +git commit -m "chore: add ingestion server tasks and update MCP registration" +``` + +--- + +## Task 12: Integration smoke test + +Verify the full Phase 1 system works end-to-end. + +- [ ] **Step 1: Start the ingestion server** + +```bash +INGEST_BRAIN_DIR=./brain INGEST_PORT=3300 go run ./cmd/server & +sleep 1 +curl -s http://localhost:3300/query -d '{"query":"test"}' -H "Content-Type: application/json" | jq . +``` + +Expected: `{"results": []}` (brain is empty — that's correct). + +- [ ] **Step 2: Write a note to the brain** + +```bash +curl -s -X POST http://localhost:3300/write \ + -H "Content-Type: application/json" \ + -d '{"content": "# TDD Pattern\n\nAlways write the failing test first.", "filename": "tdd-pattern-test.md"}' | jq . +``` + +Expected: `{"path": "raw/tdd-pattern-test.md"}`. + +- [ ] **Step 3: Start the supervisor** + +```bash +SUPERVISOR_CONFIG_DIR=./config/supervisor \ +SUPERVISOR_MODELS_FILE=./config/models.yaml \ +SUPERVISOR_SESSIONS_DIR=./brain/sessions \ +SUPERVISOR_BRAIN_DIR=./brain \ +INGEST_BASE_URL=http://localhost:3300 \ +go run ./cmd/supervisor/ & +sleep 1 +``` + +- [ ] **Step 4: Verify tools/list includes all new tools** + +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' | jq '.result.tools[].name' +``` + +Expected output includes: +``` +"tdd_red" +"tdd_green" +"tdd_refactor" +"brain_query" +"brain_write" +"tier" +"session_log" +"retrospective" +``` + +- [ ] **Step 5: Call tier tool** + +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"tier","arguments":{}}}' | jq . +``` + +Expected: valid tier response with `tier`, `label`, `managed_agents` fields. + +- [ ] **Step 6: Call brain_query** + +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"brain_query","arguments":{"query":"TDD failing test"}}}' | jq . +``` + +Expected: results array containing the tdd-pattern-test.md note written in Step 2. + +- [ ] **Step 7: Call session_log** + +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"session_log","arguments":{"session_id":"smoke-test","skill":"tdd_green","final_status":"pass","model_used":"test","duration_ms":1000}}}' | jq . +``` + +Expected: `{"status":"ok","session_id":"smoke-test"}`. + +Verify file: `cat brain/sessions/smoke-test.jsonl` — should contain one JSON line. + +- [ ] **Step 8: Stop servers and commit** + +```bash +pkill -f "go run ./cmd/server" 2>/dev/null +pkill -f "go run ./cmd/supervisor" 2>/dev/null +git add -A +git commit -m "test: phase 1 integration smoke test passing" +``` + +--- + +## Success Criteria + +- [ ] `go test ./...` passes in supervisor module +- [ ] `go test ./...` passes in ingestion module +- [ ] `tools/list` returns 8 tools: tdd_red, tdd_green, tdd_refactor, brain_query, brain_write, tier, session_log, retrospective +- [ ] `brain_query` returns results after a note is written via `brain_write` +- [ ] `session_log` appends JSONL entries to `brain/sessions/` +- [ ] `tier` returns a valid JSON response with tier/label/managed_agents fields +- [ ] `brain/training-data/` directory structure exists diff --git a/docs/superpowers/plans/2026-04-19-hyperguild-phase2.md b/docs/superpowers/plans/2026-04-19-hyperguild-phase2.md new file mode 100644 index 0000000..24748bb --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-hyperguild-phase2.md @@ -0,0 +1,1871 @@ +# Hyperguild Phase 2 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add four new MCP skills (review, debug, spec, trainer) to the hyperguild supervisor, with automatic session history injection into all multi-phase skill workers. + +**Architecture:** The supervisor reads prior session entries and injects them into each worker's task prompt when `session_id` is provided — the orchestrator no longer re-summarises history. The `trainer` skill runs a two-step sub-agent chain: a reader agent identifies learning moments from the session log, then a writer agent formats them as SFT/DPO pairs and writes to `brain/training-data/`. All other new skills are single-worker. + +**Tech Stack:** Go 1.26, `internal/session` (JSONL log), `internal/exec` (claude subprocess executor), `internal/registry` (MCP tool registry), `config/supervisor/*.md` (discipline files), `config/models.yaml` (model routing) + +--- + +## File Map + +### New files +| File | Responsibility | +|------|---------------| +| `internal/session/history.go` | `FormatHistory(entries, excludePhase)` — formats prior session entries as a prompt block | +| `internal/session/history_test.go` | Unit tests for FormatHistory | +| `internal/skills/review/skill.go` | review skill: Config, Skill, New, Name, Tools | +| `internal/skills/review/handlers.go` | `Handle`, `handleReview` — single-phase code review worker | +| `internal/skills/review/handlers_test.go` | review handler unit tests | +| `internal/skills/debug/skill.go` | debug skill | +| `internal/skills/debug/handlers.go` | `handleDebug` — hypothesis generation worker | +| `internal/skills/debug/handlers_test.go` | debug handler unit tests | +| `internal/skills/spec/skill.go` | spec skill | +| `internal/skills/spec/handlers.go` | `handleSpec` — spec writing worker | +| `internal/skills/spec/handlers_test.go` | spec handler unit tests | +| `internal/skills/trainer/skill.go` | trainer skill: Config with ReaderPrompt + WriterPrompt + BrainDir | +| `internal/skills/trainer/handlers.go` | `handleTrain` — calls ExecutorFn twice: reader then writer | +| `internal/skills/trainer/handlers_test.go` | trainer handler unit tests (verifies two-call chain) | +| `config/supervisor/review.md` | Review worker discipline file | +| `config/supervisor/debug.md` | Debug worker discipline file | +| `config/supervisor/spec.md` | Spec worker discipline file | +| `config/supervisor/trainer-reader.md` | Trainer reader agent discipline | +| `config/supervisor/trainer-writer.md` | Trainer writer agent discipline | + +### Modified files +| File | Change | +|------|--------| +| `internal/exec/result.go` | Add review/debug/spec/trainer to `validPhases` and Schema enum | +| `internal/skills/tdd/skill.go` | Add `SessionsDir string` to Config; add `session_id` to green/refactor tool schemas | +| `internal/skills/tdd/handlers.go` | Add `session_id` to `greenArgs` and `refactorArgs`; inject history when `session_id` non-empty | +| `internal/skills/tdd/handlers_test.go` | Add tests for history injection in green and refactor | +| `cmd/supervisor/main.go` | Pass `SessionsDir` to tdd.Config; read discipline files and register 4 new skills | +| `config/models.yaml` | Add `trainer` model entry | + +--- + +## Task 1: Session history utility + +**Files:** +- Create: `internal/session/history.go` +- Create: `internal/session/history_test.go` + +- [ ] **Step 1: Write the failing test** + +```go +// internal/session/history_test.go +package session_test + +import ( + "testing" + "time" + + "github.com/mathiasbq/supervisor/internal/session" + "github.com/stretchr/testify/assert" +) + +func TestFormatHistoryEmpty(t *testing.T) { + result := session.FormatHistory(nil, "") + assert.Equal(t, "", result) +} + +func TestFormatHistoryFormatsEntries(t *testing.T) { + entries := []session.Entry{ + { + Skill: "tdd", Phase: "red", FinalStatus: "pass", + FilePath: "internal/foo/foo_test.go", + Message: "wrote failing test for Foo", + Timestamp: time.Now(), + }, + } + result := session.FormatHistory(entries, "") + assert.Contains(t, result, "## Session history") + assert.Contains(t, result, "Phase: red") + assert.Contains(t, result, "wrote failing test for Foo") + assert.Contains(t, result, "internal/foo/foo_test.go") +} + +func TestFormatHistoryExcludesCurrentPhase(t *testing.T) { + entries := []session.Entry{ + {Skill: "tdd", Phase: "red", Message: "red done", FinalStatus: "pass"}, + {Skill: "tdd", Phase: "green", Message: "green done", FinalStatus: "pass"}, + } + result := session.FormatHistory(entries, "green") + assert.Contains(t, result, "red done") + assert.NotContains(t, result, "green done") +} +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +cd /path/to/supervisor +go test ./internal/session/... -run TestFormatHistory -v +``` +Expected: `FAIL` — `session.FormatHistory undefined` + +- [ ] **Step 3: Implement FormatHistory** + +```go +// internal/session/history.go +package session + +import ( + "fmt" + "strings" +) + +// FormatHistory formats prior session entries as a structured block for +// injection into a worker task prompt. Entries matching excludePhase are +// omitted (pass the current phase to avoid circular injection). +func FormatHistory(entries []Entry, excludePhase string) string { + var filtered []Entry + for _, e := range entries { + if e.Phase != excludePhase { + filtered = append(filtered, e) + } + } + if len(filtered) == 0 { + return "" + } + + var b strings.Builder + b.WriteString("## Session history\n\n") + for _, e := range filtered { + fmt.Fprintf(&b, "### Phase: %s\n", e.Phase) + fmt.Fprintf(&b, "- Skill: %s\n", e.Skill) + fmt.Fprintf(&b, "- Status: %s\n", e.FinalStatus) + if e.FilePath != "" { + fmt.Fprintf(&b, "- File: %s\n", e.FilePath) + } + if e.Message != "" { + fmt.Fprintf(&b, "- Summary: %s\n", e.Message) + } + b.WriteString("\n") + } + return b.String() +} +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +```bash +go test ./internal/session/... -v +``` +Expected: all `TestFormatHistory*` tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add internal/session/history.go internal/session/history_test.go +git commit -m "feat(session): add FormatHistory for worker context injection" +``` + +--- + +## Task 2: Fix Schema enum and validPhases + +**Files:** +- Modify: `internal/exec/result.go` + +The Schema's `phase` enum currently only lists `["red","green","refactor"]`. Workers using any other phase name get forced to pick from that list (the retrospective worker was returning `"refactor"` as a result). Fix by removing the enum constraint from the schema (let it be any string) and rely solely on `validPhases` for server-side validation. + +- [ ] **Step 1: Write the failing test** + +```go +// Add to internal/exec/result_test.go (check existing file first for test helpers) +func TestValidateAcceptsAllPhases(t *testing.T) { + phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"} + for _, phase := range phases { + r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"} + assert.NoError(t, r.Validate(), "phase %q should be valid", phase) + } +} +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +go test ./internal/exec/... -run TestValidateAcceptsAllPhases -v +``` +Expected: FAIL — `"review"`, `"debug"`, `"spec"`, `"trainer"` fail validation + +- [ ] **Step 3: Update result.go** + +In `internal/exec/result.go`, make these two changes: + +**Change 1** — update `validPhases`: +```go +var validPhases = map[string]bool{ + "red": true, + "green": true, + "refactor": true, + "retrospective": true, + "review": true, + "debug": true, + "spec": true, + "trainer": true, +} +``` + +**Change 2** — remove the enum constraint from the Schema `phase` property (replace the `"phase"` line): +```go +// Before: +"phase": {"type": "string", "enum": ["red","green","refactor"]}, + +// After: +"phase": {"type": "string"}, +``` + +Also fix the error message in `Validate()`: +```go +// Before: +errs = append(errs, "phase must be red|green|refactor, got: "+r.Phase) + +// After: +errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase) +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +```bash +go test ./internal/exec/... -v +``` +Expected: all tests PASS, including `TestValidateAcceptsAllPhases` + +- [ ] **Step 5: Commit** + +```bash +git add internal/exec/result.go +git commit -m "fix(exec): expand validPhases and remove schema enum constraint for phase" +``` + +--- + +## Task 3: Session history injection in TDD green and refactor + +**Files:** +- Modify: `internal/skills/tdd/skill.go` +- Modify: `internal/skills/tdd/handlers.go` +- Modify: `internal/skills/tdd/handlers_test.go` +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write the failing tests** + +Add to `internal/skills/tdd/handlers_test.go`: + +```go +func TestTDDGreenInjectsSessionHistory(t *testing.T) { + sessDir := t.TempDir() + require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{ + SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass", + FilePath: "internal/foo/foo_test.go", + Message: "wrote failing test for Foo", + })) + + var capturedPrompt string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + capturedPrompt = req.TaskPrompt + return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil + } + + sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir}) + _, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage( + `{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`, + )) + require.NoError(t, err) + assert.Contains(t, capturedPrompt, "## Session history") + assert.Contains(t, capturedPrompt, "wrote failing test for Foo") +} + +func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) { + var capturedPrompt string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + capturedPrompt = req.TaskPrompt + return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil + } + + sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + _, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage( + `{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`, + )) + require.NoError(t, err) + assert.NotContains(t, capturedPrompt, "## Session history") +} +``` + +You will need these imports in the test file: +```go +import ( + "github.com/mathiasbq/supervisor/internal/session" + iexec "github.com/mathiasbq/supervisor/internal/exec" +) +``` + +- [ ] **Step 2: Run tests to confirm they fail** + +```bash +go test ./internal/skills/tdd/... -run TestTDDGreen -v +``` +Expected: FAIL — `tdd.Config` has no `SessionsDir` field, `session_id` not in args + +- [ ] **Step 3: Add SessionsDir to Config and session_id to tool schemas** + +In `internal/skills/tdd/skill.go`, add `SessionsDir` to Config: +```go +type Config struct { + SystemPrompt string + SkillPrompt string + ExecutorFn ExecutorFn + DefaultModel string + SessionsDir string // optional: path to brain/sessions/ for history injection +} +``` + +In `Tools()`, add `"session_id"` as an optional property to `tdd_green` and `tdd_refactor` input schemas: +```go +// tdd_green InputSchema: +InputSchema: schema( + []string{"project_root", "test_path"}, + map[string]any{ + "project_root": strProp, + "test_path": strProp, + "model": strProp, + "test_cmd": strProp, + "session_id": strProp, + }, +), +// tdd_refactor InputSchema (same addition): +InputSchema: schema( + []string{"project_root", "test_path", "impl_path"}, + map[string]any{ + "project_root": strProp, + "test_path": strProp, + "impl_path": strProp, + "model": strProp, + "test_cmd": strProp, + "session_id": strProp, + }, +), +``` + +- [ ] **Step 4: Update greenArgs, refactorArgs, and inject history** + +In `internal/skills/tdd/handlers.go`, add imports and update structs + handlers: + +```go +import ( + // existing imports... + "github.com/mathiasbq/supervisor/internal/session" +) + +type greenArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` +} + +func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args greenArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + + task := fmt.Sprintf( + "phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd, + ) + task = s.prependHistory(args.SessionID, "green", task) + return s.execute(ctx, task) +} + +type refactorArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + ImplPath string `json:"impl_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` +} + +func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args refactorArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + if args.ImplPath == "" { + return nil, fmt.Errorf("impl_path is required") + } + + task := fmt.Sprintf( + "phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd, + ) + task = s.prependHistory(args.SessionID, "refactor", task) + return s.execute(ctx, task) +} + +// prependHistory reads the session log and prepends prior phase entries to the task prompt. +// If sessionID is empty or SessionsDir is not configured, task is returned unchanged. +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} +``` + +- [ ] **Step 5: Update main.go to pass SessionsDir to tdd.Config** + +In `cmd/supervisor/main.go`, find the `tdd.New(tdd.Config{...})` call and add `SessionsDir`: +```go +reg.Register(tdd.New(tdd.Config{ + SystemPrompt: string(systemPrompt), + SkillPrompt: string(tddPrompt), + DefaultModel: models.Resolve("tdd", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, // ← add this line +})) +``` + +- [ ] **Step 6: Run all tests** + +```bash +go test ./... -race -count=1 +``` +Expected: all tests PASS + +- [ ] **Step 7: Commit** + +```bash +git add internal/skills/tdd/skill.go internal/skills/tdd/handlers.go internal/skills/tdd/handlers_test.go cmd/supervisor/main.go +git commit -m "feat(tdd): inject session history into green and refactor worker prompts" +``` + +--- + +## Task 4: review skill + +**Files:** +- Create: `internal/skills/review/skill.go` +- Create: `internal/skills/review/handlers.go` +- Create: `internal/skills/review/handlers_test.go` +- Create: `config/supervisor/review.md` +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write the failing test** + +```go +// internal/skills/review/handlers_test.go +package review_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/review" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReviewToolRegistered(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "review rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "review") +} + +func TestReviewRequiresProjectRoot(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "r"}) + _, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"files":["main.go"]}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestReviewRequiresFiles(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "r"}) + _, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "files") +} + +func TestReviewCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", + Verified: true, ModelUsed: "self", Message: "2 warnings found", + }, nil + } + + sk := review.New(review.Config{SkillPrompt: "review rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "review", json.RawMessage( + `{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "internal/foo/foo.go") + assert.Contains(t, capturedTask, "PR: add Foo helper") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "pass", result.Status) + assert.Equal(t, "review", result.Phase) +} + +var _ = require.New +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +go test ./internal/skills/review/... -v +``` +Expected: FAIL — package `review` does not exist + +- [ ] **Step 3: Create skill.go** + +```go +// internal/skills/review/skill.go +package review + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +type Skill struct{ cfg Config } + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "review" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + return []registry.ToolDef{ + { + Name: "review", + Description: "Perform a structured code review of the specified files. Returns findings with severity levels.", + InputSchema: schema( + []string{"project_root", "files"}, + map[string]any{ + "project_root": map[string]any{"type": "string"}, + "files": map[string]any{"type": "array", "items": map[string]any{"type": "string"}}, + "context": map[string]any{"type": "string"}, + "model": map[string]any{"type": "string"}, + "session_id": map[string]any{"type": "string"}, + }, + ), + }, + } +} +``` + +- [ ] **Step 4: Create handlers.go** + +```go +// internal/skills/review/handlers.go +package review + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type reviewArgs struct { + ProjectRoot string `json:"project_root"` + Files []string `json:"files"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "review" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a reviewArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if len(a.Files) == 0 { + return nil, fmt.Errorf("files is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model, + ) + task = s.prependHistory(a.SessionID, "review", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} +``` + +- [ ] **Step 5: Create config/supervisor/review.md** + +```markdown +# Code Review Discipline + +You are a disciplined code reviewer. Read files carefully before commenting. + +## Iron laws +1. Never approve security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries +2. Never approve silently swallowed errors — `err != nil` without wrapping or handling is always wrong +3. Never approve missing validation at system boundaries (user input, external APIs, file reads) + +## Output contract +Return JSON result with: +- `status`: "pass" if no blocking issues; "fail" if any iron law is violated +- `phase`: "review" +- `skill`: "review" +- `file_path`: first file reviewed +- `runner_output`: full review formatted as: + ``` + CRITICAL: at : + WARNING: at : + SUGGESTION: at : + ``` +- `verified`: true if you read all specified files; false if any were missing or unreadable +- `message`: "N critical, M warnings, K suggestions" or "clean: " + +## Rules +1. Read every file listed before writing feedback +2. Check iron laws first — any violation is CRITICAL and sets status to "fail" +3. Then check: correctness, test coverage for new code, Go style conventions +4. Never rubber-stamp — if nothing is wrong, explain specifically which iron law checks you ran and why they passed +5. Line references are required for every finding — "roughly around the middle" is not acceptable +``` + +- [ ] **Step 6: Wire into main.go** + +In `cmd/supervisor/main.go`, add after existing `os.ReadFile` calls for discipline files: +```go +reviewPrompt, err := os.ReadFile(cfg.ConfigDir + "/review.md") +if err != nil { + logger.Error("read review.md", "path", cfg.ConfigDir+"/review.md", "err", err) + os.Exit(1) +} +``` + +Add import: `"github.com/mathiasbq/supervisor/internal/skills/review"` + +Register after existing `reg.Register` calls: +```go +reg.Register(review.New(review.Config{ + SkillPrompt: string(reviewPrompt), + DefaultModel: models.Resolve("review", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, +})) +``` + +- [ ] **Step 7: Run all tests** + +```bash +go test ./... -race -count=1 +``` +Expected: all tests PASS + +- [ ] **Step 8: Commit** + +```bash +git add internal/skills/review/ config/supervisor/review.md cmd/supervisor/main.go +git commit -m "feat(review): add code review MCP skill with session history injection" +``` + +--- + +## Task 5: debug skill + +**Files:** +- Create: `internal/skills/debug/skill.go` +- Create: `internal/skills/debug/handlers.go` +- Create: `internal/skills/debug/handlers_test.go` +- Create: `config/supervisor/debug.md` +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write the failing test** + +```go +// internal/skills/debug/handlers_test.go +package debug_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/debug" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDebugToolRegistered(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "debug rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "debug") +} + +func TestDebugRequiresProjectRoot(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestDebugRequiresError(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "error") +} + +func TestDebugCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "debug", Skill: "debug", + RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference", + Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42", + }, nil + } + + sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "debug", json.RawMessage( + `{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "panic: nil pointer dereference") + assert.Contains(t, capturedTask, "occurs on startup") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "debug", result.Phase) +} + +var _ = require.New +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +go test ./internal/skills/debug/... -v +``` +Expected: FAIL — package does not exist + +- [ ] **Step 3: Create skill.go** + +```go +// internal/skills/debug/skill.go +package debug + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +type Skill struct{ cfg Config } + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "debug" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "debug", + Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.", + InputSchema: schema( + []string{"project_root", "error"}, + map[string]any{ + "project_root": str, + "error": str, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +} +``` + +- [ ] **Step 4: Create handlers.go** + +```go +// internal/skills/debug/handlers.go +package debug + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type debugArgs struct { + ProjectRoot string `json:"project_root"` + Error string `json:"error"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "debug" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a debugArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Error == "" { + return nil, fmt.Errorf("error is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Error, a.Context, model, + ) + task = s.prependHistory(a.SessionID, "debug", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} +``` + +- [ ] **Step 5: Create config/supervisor/debug.md** + +```markdown +# Debug Discipline + +You are a systematic debugger. Form hypotheses before suggesting fixes. + +## Iron laws +1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct +2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first) +3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses + +## Output contract +Return JSON result with: +- `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse) +- `phase`: "debug" +- `skill`: "debug" +- `file_path`: the most relevant file to the error (read it) +- `runner_output`: your hypotheses, formatted as: + ``` + HYPOTHESIS 1 (likelihood: high): + VERIFY: → expected if correct: + + HYPOTHESIS 2 (likelihood: medium): + VERIFY: → expected if correct: + ``` +- `verified`: false — verification is the caller's job +- `message`: "N hypotheses for: " + +## Rules +1. Read the error and any context files provided before forming hypotheses +2. Identify the failure mode first — what actually went wrong, not just what the error says +3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output +4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1 +``` + +- [ ] **Step 6: Wire into main.go** + +Add file read: +```go +debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md") +if err != nil { + logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err) + os.Exit(1) +} +``` + +Add import: `"github.com/mathiasbq/supervisor/internal/skills/debug"` + +Register skill: +```go +reg.Register(debug.New(debug.Config{ + SkillPrompt: string(debugPrompt), + DefaultModel: models.Resolve("debug", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, +})) +``` + +- [ ] **Step 7: Run all tests** + +```bash +go test ./... -race -count=1 +``` +Expected: all tests PASS + +- [ ] **Step 8: Commit** + +```bash +git add internal/skills/debug/ config/supervisor/debug.md cmd/supervisor/main.go +git commit -m "feat(debug): add debug MCP skill with hypothesis generation" +``` + +--- + +## Task 6: spec skill + +**Files:** +- Create: `internal/skills/spec/skill.go` +- Create: `internal/skills/spec/handlers.go` +- Create: `internal/skills/spec/handlers_test.go` +- Create: `config/supervisor/spec.md` +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write the failing test** + +```go +// internal/skills/spec/handlers_test.go +package spec_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/spec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSpecToolRegistered(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "spec rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "spec") +} + +func TestSpecRequiresProjectRoot(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "s"}) + _, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"requirements":"add login"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestSpecRequiresRequirements(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "s"}) + _, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "requirements") +} + +func TestSpecCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "spec", Skill: "spec", + FilePath: "/tmp/proj/docs/login-spec.md", + Verified: true, ModelUsed: "self", Message: "spec written: login feature", + }, nil + } + + sk := spec.New(spec.Config{SkillPrompt: "spec rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "spec", json.RawMessage( + `{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "OAuth2 login") + assert.Contains(t, capturedTask, "docs/login-spec.md") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "spec", result.Phase) +} + +var _ = require.New +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +go test ./internal/skills/spec/... -v +``` +Expected: FAIL — package does not exist + +- [ ] **Step 3: Create skill.go** + +```go +// internal/skills/spec/skill.go +package spec + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +type Skill struct{ cfg Config } + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "spec" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "spec", + Description: "Generate a structured implementation spec from requirements. Writes the spec to output_path in the project.", + InputSchema: schema( + []string{"project_root", "requirements"}, + map[string]any{ + "project_root": str, + "requirements": str, + "output_path": str, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +} +``` + +- [ ] **Step 4: Create handlers.go** + +```go +// internal/skills/spec/handlers.go +package spec + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type specArgs struct { + ProjectRoot string `json:"project_root"` + Requirements string `json:"requirements"` + OutputPath string `json:"output_path"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "spec" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a specArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Requirements == "" { + return nil, fmt.Errorf("requirements is required") + } + outputPath := a.OutputPath + if outputPath == "" { + outputPath = "docs/spec.md" + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Requirements, outputPath, a.Context, model, + ) + task = s.prependHistory(a.SessionID, "spec", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} +``` + +- [ ] **Step 5: Create config/supervisor/spec.md** + +```markdown +# Spec Writing Discipline + +You write structured implementation specs. Nothing is left ambiguous. + +## Iron laws +1. Success criteria must be measurable — "the system is fast" is banned; "p99 < 200ms under 100 RPS" is valid +2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong +3. Every technical decision in the approach must have a rationale + +## Output contract +Return JSON result with: +- `status`: "pass" (spec written) or "error" (requirements too ambiguous to spec without more input) +- `phase`: "spec" +- `skill`: "spec" +- `file_path`: the output_path where the spec was written (absolute path) +- `runner_output`: "" +- `verified`: true if the file was written successfully +- `message`: "spec written: " + +## Spec structure +Write the spec as markdown to the output_path: + +```markdown +# [Feature] Spec + +## Problem statement +[What problem does this solve? For whom? Why now?] + +## Success criteria +- [ ] [Criterion 1 — measurable and verifiable] +- [ ] [Criterion 2 — measurable and verifiable] + +## Constraints +[Non-negotiable requirements the solution must satisfy] + +## Out of scope +[What we are explicitly NOT doing in this iteration] + +## Technical approach +[Architecture decisions, key components, rationale for each choice] + +## Risks +[What could go wrong, and how we'd mitigate it] +``` + +If the requirements are too vague to produce measurable success criteria, return status "error" with a message listing the specific questions that need answers. +``` + +- [ ] **Step 6: Wire into main.go** + +Add file read: +```go +specPrompt, err := os.ReadFile(cfg.ConfigDir + "/spec.md") +if err != nil { + logger.Error("read spec.md", "path", cfg.ConfigDir+"/spec.md", "err", err) + os.Exit(1) +} +``` + +Add import: `"github.com/mathiasbq/supervisor/internal/skills/spec"` + +Register skill: +```go +reg.Register(spec.New(spec.Config{ + SkillPrompt: string(specPrompt), + DefaultModel: models.Resolve("spec", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, +})) +``` + +- [ ] **Step 7: Run all tests** + +```bash +go test ./... -race -count=1 +``` +Expected: all tests PASS + +- [ ] **Step 8: Commit** + +```bash +git add internal/skills/spec/ config/supervisor/spec.md cmd/supervisor/main.go +git commit -m "feat(spec): add spec writing MCP skill" +``` + +--- + +## Task 7: trainer skill + +**Files:** +- Create: `internal/skills/trainer/skill.go` +- Create: `internal/skills/trainer/handlers.go` +- Create: `internal/skills/trainer/handlers_test.go` +- Create: `config/supervisor/trainer-reader.md` +- Create: `config/supervisor/trainer-writer.md` +- Modify: `cmd/supervisor/main.go` +- Modify: `config/models.yaml` + +- [ ] **Step 1: Write the failing test** + +```go +// internal/skills/trainer/handlers_test.go +package trainer_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" + "github.com/mathiasbq/supervisor/internal/skills/trainer" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTrainerToolRegistered(t *testing.T) { + sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "trainer") +} + +func TestTrainerRequiresSessionID(t *testing.T) { + sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"}) + _, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{}`)) + assert.ErrorContains(t, err, "session_id") +} + +func TestTrainerCallsReaderThenWriter(t *testing.T) { + sessDir := t.TempDir() + require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{ + SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass", + Message: "wrote failing test", FilePath: "internal/foo/foo_test.go", + })) + + callCount := 0 + var readerTask, writerTask string + + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + callCount++ + if callCount == 1 { + // reader call + readerTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "trainer", Skill: "trainer", + RunnerOutput: `[{"type":"sft","moment":"first-pass clean TDD","score":4}]`, + Verified: true, ModelUsed: "self", Message: "1 sft candidate found", + }, nil + } + // writer call + writerTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "trainer", Skill: "trainer", + FilePath: sessDir + "/training-data/sft/sess-1.jsonl", + Verified: true, ModelUsed: "self", Message: "1 sft pair written", + }, nil + } + + sk := trainer.New(trainer.Config{ + ReaderPrompt: "reader rules", + WriterPrompt: "writer rules", + ExecutorFn: fakeFn, + SessionsDir: sessDir, + BrainDir: t.TempDir(), + }) + out, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{"session_id":"sess-1"}`)) + require.NoError(t, err) + + assert.Equal(t, 2, callCount, "executor must be called exactly twice: reader then writer") + assert.Contains(t, readerTask, "role: reader") + assert.Contains(t, readerTask, "sess-1") + assert.Contains(t, readerTask, "wrote failing test") // session history in reader prompt + assert.Contains(t, writerTask, "role: writer") + assert.Contains(t, writerTask, "sft candidate") // reader output passed to writer + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "trainer", result.Phase) + assert.Equal(t, "pass", result.Status) +} + +var _ = require.New +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +go test ./internal/skills/trainer/... -v +``` +Expected: FAIL — package does not exist + +- [ ] **Step 3: Create skill.go** + +```go +// internal/skills/trainer/skill.go +package trainer + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +type Config struct { + ReaderPrompt string + WriterPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string + BrainDir string // root of brain/ directory; writer writes to BrainDir/training-data/ +} + +type Skill struct{ cfg Config } + +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +func (s *Skill) Name() string { return "trainer" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + return []registry.ToolDef{ + { + Name: "trainer", + Description: "Extract SFT and DPO training pairs from a session log. Runs a reader→writer chain: reader identifies learning moments, writer formats and writes pairs to brain/training-data/.", + InputSchema: schema( + []string{"session_id"}, + map[string]any{ + "session_id": map[string]any{"type": "string"}, + "model": map[string]any{"type": "string"}, + }, + ), + }, + } +} +``` + +- [ ] **Step 4: Create handlers.go** + +```go +// internal/skills/trainer/handlers.go +package trainer + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type trainArgs struct { + SessionID string `json:"session_id"` + Model string `json:"model"` +} + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "trainer" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a trainArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + entries, err := session.Read(s.cfg.SessionsDir, a.SessionID) + if err != nil { + return nil, fmt.Errorf("read session log: %w", err) + } + + // ── Step 1: Reader agent ───────────────────────────────────────────────── + history := session.FormatHistory(entries, "") + readerTask := fmt.Sprintf( + "role: reader\nsession_id: %s\nbrain_dir: %s\n\n%s", + a.SessionID, s.cfg.BrainDir, history, + ) + readerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.ReaderPrompt, + TaskPrompt: readerTask, + Model: model, + Tools: "Read", + }) + if err != nil { + return nil, fmt.Errorf("reader agent: %w", err) + } + + // ── Step 2: Writer agent (receives reader candidates) ──────────────────── + writerTask := fmt.Sprintf( + "role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_candidates:\n%s", + a.SessionID, s.cfg.BrainDir, readerResult.RunnerOutput, + ) + writerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.WriterPrompt, + TaskPrompt: writerTask, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, fmt.Errorf("writer agent: %w", err) + } + + b, err := json.Marshal(writerResult) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 5: Create config/supervisor/trainer-reader.md** + +```markdown +# Trainer Reader Discipline + +You scan session logs and identify candidate learning moments worth converting to training data. + +## What to look for +- **SFT candidates**: the worker did exactly the right thing — a clean pattern worth reinforcing +- **DPO candidates**: the worker first produced a wrong or suboptimal response, then corrected — you have both rejected and chosen + +## Scoring (1–5) +- 5: novel pattern, clearly correct, generalises across projects +- 4: good pattern, correct, somewhat project-specific but still useful +- 3: correct but obvious — include only if especially clean +- 2 or below: skip — too ambiguous or too context-specific + +## Output contract +Return JSON result with: +- `status`: "pass" or "error" +- `phase`: "trainer" +- `skill`: "trainer" +- `file_path`: "" +- `runner_output`: JSON array of candidates (valid JSON, not markdown): + [{"type":"sft","moment":"","prompt":"","completion":"","score":4}, + {"type":"dpo","moment":"","prompt":"","chosen":"","rejected":"","score":3}] +- `verified`: true +- `message`: "N sft candidates, M dpo candidates found" + +## Rules +1. Read all session entries in the task prompt +2. Score each entry — only include entries scoring >= 3 +3. Prompt/completion fields must be phrased to generalise: no project-specific paths or names +4. If no candidates score >= 3, return an empty array `[]` — never force low-quality candidates +``` + +- [ ] **Step 6: Create config/supervisor/trainer-writer.md** + +```markdown +# Trainer Writer Discipline + +You receive candidate learning moments from the reader and write clean SFT/DPO training pairs. + +## Quality gate (apply before writing) +- SFT: prompt must be phrased so it could come from any project, not just this one +- DPO: chosen and rejected must be clearly distinguishable — skip if a reader can't tell which is better +- Never include project-specific paths, variable names, or identifiers in any pair + +## Output contract +Return JSON result with: +- `status`: "pass" (pairs written or skipped due to quality) or "error" (candidates JSON was malformed) +- `phase`: "trainer" +- `skill`: "trainer" +- `file_path`: path of the last file written (empty if nothing passed quality gate) +- `runner_output`: "N SFT pairs written to brain/training-data/sft/, M DPO pairs to brain/training-data/dpo/" or "0 pairs passed quality gate" +- `verified`: true if files were written; false if nothing passed +- `message`: "N sft + M dpo pairs for session " or "no pairs passed quality gate" + +## File format +JSONL — one JSON object per line. + +SFT: `{"prompt": "...", "completion": "..."}` +DPO: `{"prompt": "...", "chosen": "...", "rejected": "..."}` + +Write SFT to: `/training-data/sft/.jsonl` +Write DPO to: `/training-data/dpo/.jsonl` + +Append to existing files if they exist (don't overwrite). + +## Rules +1. Parse the `reader_candidates` JSON from the task prompt +2. For each candidate: apply quality gate +3. Write passing SFT candidates to sft JSONL, DPO candidates to dpo JSONL +4. If nothing passes, return status "pass" with verified: false and message "no pairs passed quality gate" +``` + +- [ ] **Step 7: Wire into main.go** + +Add file reads: +```go +trainerReaderPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-reader.md") +if err != nil { + logger.Error("read trainer-reader.md", "path", cfg.ConfigDir+"/trainer-reader.md", "err", err) + os.Exit(1) +} +trainerWriterPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-writer.md") +if err != nil { + logger.Error("read trainer-writer.md", "path", cfg.ConfigDir+"/trainer-writer.md", "err", err) + os.Exit(1) +} +``` + +Add import: `"github.com/mathiasbq/supervisor/internal/skills/trainer"` + +Register skill: +```go +reg.Register(trainer.New(trainer.Config{ + ReaderPrompt: string(trainerReaderPrompt), + WriterPrompt: string(trainerWriterPrompt), + DefaultModel: models.Resolve("trainer", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + BrainDir: cfg.BrainDir, +})) +``` + +- [ ] **Step 8: Update config/models.yaml** + +Add `trainer` entry following existing format: +```yaml +skills: + tdd: ollama/qwen3-coder-30b-tuned + review: ollama/devstral-tuned + debug: ollama/deepseek-r1-tuned + retrospective: ollama/qwen3-coder-30b-tuned + spec: ollama/qwen3-coder-30b-tuned + trainer: ollama/qwen3-coder-30b-tuned +``` + +- [ ] **Step 9: Run all tests** + +```bash +go test ./... -race -count=1 +``` +Expected: all tests PASS, including `TestTrainerCallsReaderThenWriter` + +- [ ] **Step 10: Commit** + +```bash +git add internal/skills/trainer/ config/supervisor/trainer-reader.md config/supervisor/trainer-writer.md config/models.yaml cmd/supervisor/main.go +git commit -m "feat(trainer): add trainer MCP skill with reader→writer sub-agent chain" +``` + +--- + +## Task 8: Integration smoke test and CI push + +**Files:** none new — validates the full system + +- [ ] **Step 1: Run task check (full quality gate)** + +```bash +task check +``` +Expected: lint, test, and vet all pass for both modules + +- [ ] **Step 2: Verify all 12 MCP tools are registered** + +With servers running (`task start` in another terminal): +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' \ + | python3 -c "import json,sys; tools=json.load(sys.stdin)['result']['tools']; [print(t['name']) for t in tools]" +``` + +Expected output (12 tools): +``` +tdd_red +tdd_green +tdd_refactor +brain_query +brain_write +tier +session_log +retrospective +review +debug +spec +trainer +``` + +- [ ] **Step 3: Smoke test each new skill** + +```bash +# review +curl -s --max-time 10 -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["nonexistent.go"],"context":"test"}}}' + +# debug +curl -s --max-time 10 -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"debug","arguments":{"project_root":"/tmp","error":"test error"}}}' + +# spec +curl -s --max-time 10 -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"spec","arguments":{"project_root":"/tmp","requirements":"add login button"}}}' + +# trainer (requires a session log entry) +curl -s --max-time 10 -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":5,"method":"tools/call","params":{"name":"trainer","arguments":{"session_id":"2026-04-17-validate-hyperguild"}}}' +``` + +Each should return a valid JSON-RPC result (not `-32000` error). The actual worker call will take longer — `--max-time 10` just tests that the MCP dispatch layer works. + +- [ ] **Step 4: Push and verify CI** + +```bash +git push origin main +``` + +Watch the CI run complete on Gitea. Check for: +- `check` job: green (lint + test + vet) +- `mirror` job: green (pushed to GitHub) + +```bash +curl -s "https://gitea.d-ma.be/api/v1/repos/mathias/hyperguild/actions/runs?limit=1" \ + -H "Authorization: token $GITEA_TOKEN" | python3 -c " +import json,sys +d=json.load(sys.stdin) +for r in d.get('workflow_runs',[]): + print(f'#{r[\"id\"]} {r[\"status\"]:12} {r.get(\"conclusion\",\"\"):8} {r[\"display_title\"][:50]}') +" +``` + +- [ ] **Step 5: Tag v0.2.0** + +```bash +task tag version=v0.2.0 +``` + +--- + +## Self-review + +**Spec coverage check:** +- Session history injection into tdd_green and tdd_refactor → Task 3 ✓ +- All new skills accept `session_id` for history injection → Tasks 4–7 ✓ +- Trainer reader→writer chain → Task 7 ✓ +- Schema enum fixed (was causing retrospective to return wrong phase) → Task 2 ✓ +- Phase 2 skills registered in main.go → Tasks 4–7 each include main.go wiring ✓ +- CI passes → Task 8 ✓ + +**Placeholder scan:** None found — all steps include complete code. + +**Type consistency:** +- `ExecutorFn` is defined in each skill package as `func(ctx context.Context, req iexec.Request) (iexec.Result, error)` — consistent across tdd, review, debug, spec, trainer ✓ +- `Config.SessionsDir` present in all new skills ✓ +- `trainer.Config.BrainDir` used in handlers.go Task 7 writer task prompt ✓ +- `session.FormatHistory` signature `(entries []Entry, excludePhase string) string` used consistently ✓ +- `prependHistory` method is defined identically in review, debug, spec handlers — this is intentional duplication (YAGNI: not enough skills to justify extracting a shared mixin) ✓ + +**Note on tasks 4–7:** These are independent of each other and can be executed in parallel by separate subagents after Tasks 1–3 are complete. diff --git a/docs/superpowers/plans/2026-04-20-model-orchestration-plan.md b/docs/superpowers/plans/2026-04-20-model-orchestration-plan.md new file mode 100644 index 0000000..c6cb8a1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-model-orchestration-plan.md @@ -0,0 +1,1617 @@ +# Model Orchestration Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Route skill work to per-skill escalation chains (local → Claude) with Claude verifying local output and self-certifying at cloud tier. + +**Architecture:** A new `Orchestrator` type implements the same `ExecutorFn` signature used by all skill handlers — zero handler changes. For each tier in the chain, the orchestrator dispatches generation (LiteLLM for local, claude subprocess for cloud), runs Claude verification on local output, logs the attempt, and escalates on failure. The spec is at `docs/superpowers/specs/2026-04-20-model-orchestration-design.md`. + +**Tech Stack:** Go stdlib `net/http` for LiteLLM calls; existing `exec.Executor` claude subprocess for cloud tier and verification; `gopkg.in/yaml.v3` (already imported) for config parsing. + +--- + +## File structure + +| Action | File | Responsibility | +|--------|------|----------------| +| Modify | `internal/session/session.go` | Add Tier, DurationMs, WarmStart, Verdict, Feedback to Attempt | +| Modify | `internal/exec/executor.go` | Add `--model` flag to subprocess call when req.Model starts with "claude-" | +| Create | `internal/exec/litellm.go` | HTTP client to LiteLLM `/v1/chat/completions`; returns `Result` | +| Create | `internal/exec/litellm_test.go` | Mock HTTP server tests for parse/error/escalation paths | +| Create | `internal/exec/verifier.go` | Claude subprocess that returns `Verdict{Accept, Feedback}` | +| Create | `internal/exec/verifier_test.go` | Fake claude binary tests for accept/escalate/error | +| Create | `internal/exec/orchestrator.go` | Chain walker; warm probe; logging; implements ExecutorFn shape | +| Create | `internal/exec/orchestrator_test.go` | Table-driven: 1/2/3-tier chains, all outcome combinations | +| Modify | `internal/config/models.go` | Chain-aware YAML struct; ChainFor/Verifier/LlamaSwapURL methods | +| Modify | `internal/config/models_test.go` | Update for new YAML format; add ChainFor/override tests | +| Modify | `config/models.yaml` | New chain format for all 6 skills | +| Modify | `cmd/supervisor/main.go` | Create LiteLLMExecutor + Verifier; wire per-skill Orchestrators | + +--- + +### Task 1: Extend the Attempt struct + +**Files:** +- Modify: `internal/session/session.go:32-38` + +The current Attempt struct is missing tier, timing, and verdict fields. Adding them is additive (existing JSONL files deserialise fine with zero values). + +- [ ] **Step 1: Write the failing test** + +Add to `internal/session/session_test.go` (if it exists, create it otherwise): + +```go +package session_test + +import ( + "encoding/json" + "testing" + + "github.com/mathiasbq/supervisor/internal/session" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAttemptRoundTrip(t *testing.T) { + a := session.Attempt{ + Attempt: 1, + Model: "ollama/devstral", + Tier: "local", + DurationMs: 4200, + WarmStart: true, + Verified: false, + Verdict: "escalate", + Feedback: "missing line references", + } + data, err := json.Marshal(a) + require.NoError(t, err) + + var got session.Attempt + require.NoError(t, json.Unmarshal(data, &got)) + assert.Equal(t, a, got) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd /Users/mathias/Documents/local-dev/AI/supervisor +go test ./internal/session/... -run TestAttemptRoundTrip -v +``` + +Expected: FAIL — `session.Attempt` has no `Tier`, `DurationMs`, `WarmStart`, `Verdict`, `Feedback` fields. + +- [ ] **Step 3: Update Attempt struct in session.go** + +Replace lines 32–38: + +```go +// Attempt represents one subprocess invocation within a skill call. +type Attempt struct { + Attempt int `json:"attempt"` + Model string `json:"model"` + Tier string `json:"tier"` // local | subagent | managed + DurationMs int64 `json:"duration_ms"` + WarmStart bool `json:"warm_start"` // model already loaded in llama-swap + Verified bool `json:"verified"` + Verdict string `json:"verdict,omitempty"` // accept | escalate | error + Feedback string `json:"feedback,omitempty"` // verifier feedback on escalation + OutputSummary string `json:"output_summary,omitempty"` + RunnerOutput string `json:"runner_output,omitempty"` +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +go test ./internal/session/... -run TestAttemptRoundTrip -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add internal/session/session.go internal/session/session_test.go +git commit -m "feat(session): extend Attempt with tier, timing, and verdict fields" +``` + +--- + +### Task 2: Chain-based models config + +**Files:** +- Modify: `internal/config/models.go` +- Modify: `internal/config/models_test.go` +- Modify: `config/models.yaml` + +The current `modelsFile` has `Default string` and `Skills map[string]string`. Replace with a chain-aware structure. The public API gains `ChainFor`, `Verifier`, and `LlamaSwapURL` methods. The existing `Resolve` method is deleted — callers (main.go) will use `ChainFor`. + +- [ ] **Step 1: Write failing tests** + +Replace `internal/config/models_test.go` entirely: + +```go +package config_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/supervisor/internal/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const testYAML = ` +verifier: claude-sonnet-4-6 +llama_swap_url: http://koala:8080 + +default_chain: + - ollama/qwen3-coder-30b-tuned + - claude-sonnet-4-6 + +skills: + review: + chain: + - ollama/devstral-tuned + - ollama/gemma4 + - claude-sonnet-4-6 + spec: + chain: + - ollama/phi4 + - claude-opus-4-6 +` + +func writeModels(t *testing.T, content string) string { + t.Helper() + f := filepath.Join(t.TempDir(), "models.yaml") + require.NoError(t, os.WriteFile(f, []byte(content), 0644)) + return f +} + +func TestModelsVerifier(t *testing.T) { + m, err := config.LoadModels(writeModels(t, testYAML)) + require.NoError(t, err) + assert.Equal(t, "claude-sonnet-4-6", m.Verifier()) +} + +func TestModelsLlamaSwapURL(t *testing.T) { + m, err := config.LoadModels(writeModels(t, testYAML)) + require.NoError(t, err) + assert.Equal(t, "http://koala:8080", m.LlamaSwapURL()) +} + +func TestModelsChainForSkillOverride(t *testing.T) { + m, err := config.LoadModels(writeModels(t, testYAML)) + require.NoError(t, err) + + chain := m.ChainFor("review", "") + require.Len(t, chain, 3) + assert.Equal(t, "ollama/devstral-tuned", chain[0]) + assert.Equal(t, "ollama/gemma4", chain[1]) + assert.Equal(t, "claude-sonnet-4-6", chain[2]) +} + +func TestModelsChainForDefaultFallback(t *testing.T) { + m, err := config.LoadModels(writeModels(t, testYAML)) + require.NoError(t, err) + + chain := m.ChainFor("trainer", "") // not in skills map + require.Len(t, chain, 2) + assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0]) + assert.Equal(t, "claude-sonnet-4-6", chain[1]) +} + +func TestModelsChainForCallerOverride(t *testing.T) { + m, err := config.LoadModels(writeModels(t, testYAML)) + require.NoError(t, err) + + // Caller override collapses to a single-entry chain — no escalation. + chain := m.ChainFor("review", "claude-opus-4-6") + require.Len(t, chain, 1) + assert.Equal(t, "claude-opus-4-6", chain[0]) +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +go test ./internal/config/... -v +``` + +Expected: compile error — `ChainFor`, `Verifier`, `LlamaSwapURL` undefined. + +- [ ] **Step 3: Rewrite models.go** + +```go +package config + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type skillChain struct { + Chain []string `yaml:"chain"` +} + +type modelsFile struct { + Verifier string `yaml:"verifier"` + LlamaSwapURL string `yaml:"llama_swap_url"` + DefaultChain []string `yaml:"default_chain"` + Skills map[string]skillChain `yaml:"skills"` +} + +type Models struct { + data modelsFile +} + +func LoadModels(path string) (Models, error) { + raw, err := os.ReadFile(path) + if err != nil { + return Models{}, fmt.Errorf("load models: %w", err) + } + var f modelsFile + if err := yaml.Unmarshal(raw, &f); err != nil { + return Models{}, fmt.Errorf("parse models: %w", err) + } + return Models{data: f}, nil +} + +// Verifier returns the model name to use for all local-tier output verification. +func (m Models) Verifier() string { return m.data.Verifier } + +// LlamaSwapURL returns the llama-swap base URL for warm-state probing. +func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL } + +// ChainFor returns the ordered list of model names for a skill. +// If override is non-empty, returns a single-entry chain (no escalation). +// Falls back to default_chain when the skill has no explicit entry. +func (m Models) ChainFor(skill, override string) []string { + if override != "" { + return []string{override} + } + if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 { + return sc.Chain + } + out := make([]string, len(m.data.DefaultChain)) + copy(out, m.data.DefaultChain) + return out +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +go test ./internal/config/... -v +``` + +Expected: PASS — all 5 tests green. + +- [ ] **Step 5: Update config/models.yaml** + +```yaml +# Model routing chains — three-layer priority: +# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation) +# 2. per-skill chain here +# 3. default_chain fallback + +verifier: claude-sonnet-4-6 # fixed verifier for all local tiers + +llama_swap_url: http://koala:8080 # for warm-state probing + +default_chain: + - ollama/qwen3-coder-30b-tuned + - claude-sonnet-4-6 + +skills: + tdd: + chain: + - ollama/qwen3-coder-30b-tuned + - claude-sonnet-4-6 + review: + chain: + - ollama/devstral-tuned + - ollama/gemma4 + - claude-sonnet-4-6 + debug: + chain: + - ollama/deepseek-r1-tuned + - claude-sonnet-4-6 + spec: + chain: + - ollama/phi4 + - ollama/gemma4 + - claude-sonnet-4-6 + - claude-opus-4-6 + retrospective: + chain: + - ollama/qwen3-coder-30b-tuned + - claude-sonnet-4-6 + trainer: + chain: + - ollama/qwen3-coder-30b-tuned + - claude-sonnet-4-6 +``` + +- [ ] **Step 6: Verify build still compiles** + +```bash +go build ./... +``` + +Expected: compile error in main.go — `models.Resolve` no longer exists. That's expected; main.go will be fixed in Task 7. + +- [ ] **Step 7: Commit** + +```bash +git add internal/config/models.go internal/config/models_test.go config/models.yaml +git commit -m "feat(config): replace single-model config with chain-based routing" +``` + +--- + +### Task 3: Add --model flag to the claude Executor + +**Files:** +- Modify: `internal/exec/executor.go:69-76` + +The existing executor never passes a `--model` flag; the model name is injected as prompt text (currently ignored by Claude). Cloud-tier dispatch needs to actually select the right model. This change adds `--model req.Model` when the model is set and starts with "claude-". + +- [ ] **Step 1: Write the failing test** + +Add to `internal/exec/executor_test.go`: + +```go +func TestExecutorPassesModelFlag(t *testing.T) { + // The fake claude script echoes its arguments to stderr so we can assert --model was passed. + dir := t.TempDir() + script := filepath.Join(dir, "claude") + envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}` + // Script prints args to stderr, then prints envelope to stdout. + content := "#!/bin/sh\necho \"$@\" >&2\necho '" + envelope + "'\n" + require.NoError(t, os.WriteFile(script, []byte(content), 0755)) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: script, + SystemPrompt: "sys", + Timeout: 5 * time.Second, + }) + + var stderrBuf bytes.Buffer + _ = stderrBuf // not exposed; we rely on the test at the result level + result, err := ex.Run(context.Background(), iexec.Request{ + SkillPrompt: "review rules", + TaskPrompt: "do review", + Model: "claude-sonnet-4-6", + }) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + // The real assertion is that the --model flag doesn't break anything. + // Integration-level model verification is in the orchestrator tests. +} +``` + +- [ ] **Step 2: Run test to verify it passes already** + +```bash +go test ./internal/exec/... -run TestExecutorPassesModelFlag -v +``` + +Expected: PASS (the test currently succeeds since it only checks result parsing). This step confirms the baseline. + +- [ ] **Step 3: Add --model flag to executor.go** + +In `internal/exec/executor.go`, after the `args` slice is built (after line 76), add model injection: + +```go + args := []string{ + "--print", + "--permission-mode", "bypassPermissions", + "--tools", tools, + "--json-schema", Schema, + "--output-format", "json", + } + if strings.HasPrefix(req.Model, "claude-") { + args = append(args, "--model", req.Model) + } + args = append(args, prompt) +``` + +Replace the existing `args` block (lines 69-76) with the above. The `strings` import is already present. + +- [ ] **Step 4: Run all exec tests** + +```bash +go test ./internal/exec/... -v +``` + +Expected: all existing tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add internal/exec/executor.go internal/exec/executor_test.go +git commit -m "feat(exec): pass --model flag to claude subprocess for cloud-tier dispatch" +``` + +--- + +### Task 4: LiteLLM executor + +**Files:** +- Create: `internal/exec/litellm.go` +- Create: `internal/exec/litellm_test.go` + +The LiteLLM executor calls `POST /v1/chat/completions` and expects the model to return a JSON object matching the `Result` schema in the response content. No envelope — direct unmarshal. Parse failure triggers automatic escalation by the orchestrator. + +- [ ] **Step 1: Write the failing tests** + +Create `internal/exec/litellm_test.go`: + +```go +package exec_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func validResult() iexec.Result { + return iexec.Result{ + Status: "pass", + Phase: "review", + Skill: "review", + ModelUsed: "ollama/devstral", + Message: "looks good", + } +} + +func chatResponseFor(t *testing.T, result iexec.Result) string { + t.Helper() + content, err := json.Marshal(result) + require.NoError(t, err) + resp := map[string]any{ + "choices": []map[string]any{ + {"message": map[string]any{"role": "assistant", "content": string(content)}}, + }, + } + data, err := json.Marshal(resp) + require.NoError(t, err) + return string(data) +} + +func TestLiteLLMParsesValidResult(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/v1/chat/completions", r.URL.Path) + assert.Equal(t, "application/json", r.Header.Get("Content-Type")) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(chatResponseFor(t, validResult()))) + })) + defer srv.Close() + + ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second) + result, err := ex.Run(context.Background(), iexec.Request{ + SkillPrompt: "review rules", + TaskPrompt: "review the code", + Model: "ollama/devstral", + }) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + assert.Equal(t, "review", result.Skill) +} + +func TestLiteLLMSendsAuthHeader(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Bearer secret", r.Header.Get("Authorization")) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(chatResponseFor(t, validResult()))) + })) + defer srv.Close() + + ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second) + _, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"}) + require.NoError(t, err) +} + +func TestLiteLLMErrorOnNonOKStatus(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer srv.Close() + + ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second) + _, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"}) + assert.ErrorContains(t, err, "503") +} + +func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + resp := map[string]any{ + "choices": []map[string]any{ + {"message": map[string]any{"role": "assistant", "content": "not json at all"}}, + }, + } + data, _ := json.Marshal(resp) + _, _ = w.Write(data) + })) + defer srv.Close() + + ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second) + _, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"}) + assert.Error(t, err) +} + +func TestLiteLLMRespectsContextCancellation(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Block until client disconnects. + <-r.Context().Done() + })) + defer srv.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second) + _, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"}) + assert.Error(t, err) +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +go test ./internal/exec/... -run TestLiteLLM -v +``` + +Expected: compile error — `iexec.NewLiteLLM` undefined. + +- [ ] **Step 3: Create internal/exec/litellm.go** + +```go +package exec + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" +) + +// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint. +// Local models are expected to return a JSON object matching the Result schema +// as their response content — no envelope. +type LiteLLMExecutor struct { + baseURL string + apiKey string + httpClient *http.Client +} + +// NewLiteLLM creates a LiteLLMExecutor. +// timeout applies to the full HTTP round-trip per call. +func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor { + return &LiteLLMExecutor{ + baseURL: baseURL, + apiKey: apiKey, + httpClient: &http.Client{Timeout: timeout}, + } +} + +type litellmMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type litellmRequest struct { + Model string `json:"model"` + Messages []litellmMessage `json:"messages"` +} + +type litellmChoice struct { + Message litellmMessage `json:"message"` +} + +type litellmResponse struct { + Choices []litellmChoice `json:"choices"` +} + +// Run dispatches req to the LiteLLM server and parses the Result from the +// assistant message content. Returns an error on network failure, non-200 +// status, or unparseable/invalid JSON — all of which the Orchestrator treats +// as automatic escalation triggers. +func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) { + body := litellmRequest{ + Model: req.Model, + Messages: []litellmMessage{ + {Role: "system", Content: req.SkillPrompt}, + {Role: "user", Content: req.TaskPrompt}, + }, + } + + bodyBytes, err := json.Marshal(body) + if err != nil { + return Result{}, fmt.Errorf("litellm: marshal request: %w", err) + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes)) + if err != nil { + return Result{}, fmt.Errorf("litellm: create request: %w", err) + } + httpReq.Header.Set("Content-Type", "application/json") + if e.apiKey != "" { + httpReq.Header.Set("Authorization", "Bearer "+e.apiKey) + } + + resp, err := e.httpClient.Do(httpReq) + if err != nil { + return Result{}, fmt.Errorf("litellm: request failed: %w", err) + } + defer resp.Body.Close() //nolint:errcheck + + if resp.StatusCode != http.StatusOK { + return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode) + } + + var chatResp litellmResponse + if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil { + return Result{}, fmt.Errorf("litellm: decode response: %w", err) + } + if len(chatResp.Choices) == 0 { + return Result{}, fmt.Errorf("litellm: no choices in response") + } + + content := chatResp.Choices[0].Message.Content + var result Result + if err := json.Unmarshal([]byte(content), &result); err != nil { + return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content) + } + if err := result.Validate(); err != nil { + return Result{}, fmt.Errorf("litellm: invalid result: %w", err) + } + return result, nil +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +go test ./internal/exec/... -run TestLiteLLM -v +``` + +Expected: all 5 LiteLLM tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add internal/exec/litellm.go internal/exec/litellm_test.go +git commit -m "feat(exec): add LiteLLM HTTP executor for local model dispatch" +``` + +--- + +### Task 5: Claude verifier + +**Files:** +- Create: `internal/exec/verifier.go` +- Create: `internal/exec/verifier_test.go` + +The verifier runs a focused `claude --print` call. It gives Claude the skill discipline, the original task, and the local model output, and asks for a JSON verdict. Unlike the main executor it uses `--print` without `--output-format json` (no envelope) and without `--json-schema` (we parse the raw text). It selects a specific claude model via `--model`. + +- [ ] **Step 1: Write the failing tests** + +Create `internal/exec/verifier_test.go`: + +```go +package exec_test + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "testing" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string { + t.Helper() + data, err := json.Marshal(verdict) + require.NoError(t, err) + dir := t.TempDir() + script := filepath.Join(dir, "claude") + content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data)) + require.NoError(t, os.WriteFile(script, []byte(content), 0755)) + return script +} + +func TestVerifierAccepts(t *testing.T) { + claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""}) + v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second) + + verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", Message: "ok", + }) + require.NoError(t, err) + assert.True(t, verdict.Accept) + assert.Empty(t, verdict.Feedback) +} + +func TestVerifierEscalates(t *testing.T) { + claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"}) + v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second) + + verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", Message: "incomplete", + }) + require.NoError(t, err) + assert.False(t, verdict.Accept) + assert.Equal(t, "missing line references", verdict.Feedback) +} + +func TestVerifierErrorOnUnparsableOutput(t *testing.T) { + dir := t.TempDir() + script := filepath.Join(dir, "claude") + require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755)) + + v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second) + _, err := v.Verify(context.Background(), "rules", "task", iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", Message: "ok", + }) + assert.Error(t, err) +} + +func TestVerifierErrorOnNonZeroExit(t *testing.T) { + dir := t.TempDir() + script := filepath.Join(dir, "claude") + require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755)) + + v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second) + _, err := v.Verify(context.Background(), "rules", "task", iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", Message: "ok", + }) + assert.Error(t, err) +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +go test ./internal/exec/... -run TestVerifier -v +``` + +Expected: compile error — `iexec.NewVerifier`, `iexec.Verdict` undefined. + +- [ ] **Step 3: Create internal/exec/verifier.go** + +```go +package exec + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "time" +) + +// Verdict is the output of a Claude verification call. +type Verdict struct { + Accept bool `json:"accept"` + Feedback string `json:"feedback"` // empty when Accept is true +} + +// Verifier runs a focused Claude call to judge local model output. +type Verifier struct { + claudeBinary string + model string + timeout time.Duration +} + +// NewVerifier creates a Verifier that calls claude with the given binary path and model. +func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier { + if claudeBinary == "" { + claudeBinary = "claude" + } + if timeout == 0 { + timeout = 30 * time.Second + } + return &Verifier{ + claudeBinary: claudeBinary, + model: model, + timeout: timeout, + } +} + +// Verify asks Claude whether output satisfies the skill discipline's iron laws. +// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."} +// to escalate. Returns an error on subprocess failure or unparseable response. +func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) { + ctx, cancel := context.WithTimeout(ctx, v.timeout) + defer cancel() + + outputJSON, err := json.Marshal(output) + if err != nil { + return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err) + } + + prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system. + +Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract. + +Reply with JSON only — no other text: +{"accept": true, "feedback": ""} +or +{"accept": false, "feedback": ""} + +## Skill discipline +%s + +## Original task +%s + +## Generated output +%s`, skillPrompt, taskPrompt, string(outputJSON)) + + args := []string{ + "--print", + "--permission-mode", "bypassPermissions", + } + if v.model != "" { + args = append(args, "--model", v.model) + } + args = append(args, prompt) + + cmd := exec.CommandContext(ctx, v.claudeBinary, args...) + cmd.Env = os.Environ() + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + if ctx.Err() != nil { + return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout) + } + return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String()) + } + + var verdict Verdict + if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil { + return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String()) + } + return verdict, nil +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +go test ./internal/exec/... -run TestVerifier -v +``` + +Expected: all 4 verifier tests PASS. + +- [ ] **Step 5: Run all exec tests** + +```bash +go test ./internal/exec/... -v +``` + +Expected: all tests pass (executor + litellm + verifier + result tests). + +- [ ] **Step 6: Commit** + +```bash +git add internal/exec/verifier.go internal/exec/verifier_test.go +git commit -m "feat(exec): add Claude verifier for local model output quality gate" +``` + +--- + +### Task 6: Orchestrator + +**Files:** +- Create: `internal/exec/orchestrator.go` +- Create: `internal/exec/orchestrator_test.go` + +The orchestrator implements the `func(ctx, Request) (Result, error)` shape that all skill handlers expect as `ExecutorFn`. It walks the escalation chain, probes llama-swap warm state for local tiers, dispatches generation, and either accepts or escalates based on the verifier verdict. Every attempt is logged in `session.Attempt` format and appended to a provided slice. + +- [ ] **Step 1: Write the failing tests** + +Create `internal/exec/orchestrator_test.go`: + +```go +package exec_test + +import ( + "context" + "errors" + "testing" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeLocalExecutor returns the result/error on each sequential call. +type fakeLocalExecutor struct { + calls []fakeCall + callIdx int +} + +type fakeCall struct { + result iexec.Result + err error +} + +func (f *fakeLocalExecutor) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) { + if f.callIdx >= len(f.calls) { + return iexec.Result{}, errors.New("unexpected call") + } + c := f.calls[f.callIdx] + f.callIdx++ + return c.result, c.err +} + +// fakeVerifier returns the verdict on each sequential call. +type fakeVerifier struct { + verdicts []iexec.Verdict + idx int +} + +func (f *fakeVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) { + if f.idx >= len(f.verdicts) { + return iexec.Verdict{}, errors.New("unexpected verify call") + } + v := f.verdicts[f.idx] + f.idx++ + return v, nil +} + +func okResult(skill string) iexec.Result { + return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"} +} + +func TestOrchestratorSingleLocalAccept(t *testing.T) { + local := &fakeLocalExecutor{calls: []fakeCall{{result: okResult("review")}}} + verifier := &fakeVerifier{verdicts: []iexec.Verdict{{Accept: true}}} + + var attempts []iexec.AttemptRecord + orch := iexec.NewOrchestrator( + []iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}}, + local.Run, nil, verifier, "", &attempts, + ) + + result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"}) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + require.Len(t, attempts, 1) + assert.Equal(t, "local", attempts[0].Tier) + assert.Equal(t, "accept", attempts[0].Verdict) +} + +func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) { + goodResult := okResult("review") + local := &fakeLocalExecutor{calls: []fakeCall{ + {result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}}, + {result: goodResult}, + }} + verifier := &fakeVerifier{verdicts: []iexec.Verdict{ + {Accept: false, Feedback: "missing line refs"}, + {Accept: true}, + }} + + var attempts []iexec.AttemptRecord + orch := iexec.NewOrchestrator( + []iexec.ChainEntry{ + {Model: "ollama/devstral", Tier: "local", IsCloud: false}, + {Model: "ollama/gemma4", Tier: "local", IsCloud: false}, + }, + local.Run, nil, verifier, "", &attempts, + ) + + result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"}) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + require.Len(t, attempts, 2) + assert.Equal(t, "escalate", attempts[0].Verdict) + assert.Equal(t, "missing line refs", attempts[0].Feedback) + assert.Equal(t, "accept", attempts[1].Verdict) + // Feedback from tier 0 should have been injected into tier 1 task prompt. + assert.Equal(t, 2, local.callIdx) +} + +func TestOrchestratorEscalatesOnLocalError(t *testing.T) { + local := &fakeLocalExecutor{calls: []fakeCall{ + {err: errors.New("network failure")}, + {result: okResult("review")}, + }} + verifier := &fakeVerifier{verdicts: []iexec.Verdict{{Accept: true}}} + + var attempts []iexec.AttemptRecord + orch := iexec.NewOrchestrator( + []iexec.ChainEntry{ + {Model: "ollama/devstral", Tier: "local", IsCloud: false}, + {Model: "ollama/gemma4", Tier: "local", IsCloud: false}, + }, + local.Run, nil, verifier, "", &attempts, + ) + + _, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"}) + require.NoError(t, err) + require.Len(t, attempts, 2) + assert.Equal(t, "error", attempts[0].Verdict) + assert.Equal(t, "accept", attempts[1].Verdict) +} + +func TestOrchestratorCloudTierSelfCertifies(t *testing.T) { + cloudResult := okResult("review") + cloudExec := &fakeLocalExecutor{calls: []fakeCall{{result: cloudResult}}} + verifier := &fakeVerifier{} // no verdicts — should not be called + + var attempts []iexec.AttemptRecord + orch := iexec.NewOrchestrator( + []iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}}, + nil, cloudExec.Run, verifier, "", &attempts, + ) + + result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"}) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + require.Len(t, attempts, 1) + assert.Equal(t, "subagent", attempts[0].Tier) + assert.Equal(t, "accept", attempts[0].Verdict) + assert.Equal(t, 0, verifier.idx) // verifier never called +} + +func TestOrchestratorAllTiersExhausted(t *testing.T) { + local := &fakeLocalExecutor{calls: []fakeCall{ + {err: errors.New("unavailable")}, + }} + + var attempts []iexec.AttemptRecord + orch := iexec.NewOrchestrator( + []iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}}, + local.Run, nil, &fakeVerifier{}, "", &attempts, + ) + + _, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"}) + assert.ErrorContains(t, err, "all tiers exhausted") +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +go test ./internal/exec/... -run TestOrchestrator -v +``` + +Expected: compile error — `iexec.ChainEntry`, `iexec.AttemptRecord`, `iexec.NewOrchestrator` undefined. + +- [ ] **Step 3: Create internal/exec/orchestrator.go** + +```go +package exec + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// ChainEntry is one tier in an escalation chain. +type ChainEntry struct { + Model string // e.g. "ollama/phi4", "claude-sonnet-4-6" + Tier string // "local" | "subagent" | "managed" + IsCloud bool // true for claude-* models; skips verifier call +} + +// EntryFor builds a ChainEntry from a model name string. +func EntryFor(model string) ChainEntry { + cloud := strings.HasPrefix(model, "claude-") + tier := "local" + if cloud { + tier = "subagent" + } + return ChainEntry{Model: model, Tier: tier, IsCloud: cloud} +} + +// AttemptRecord captures the outcome of one tier attempt for session logging. +type AttemptRecord struct { + Model string + Tier string + DurationMs int64 + WarmStart bool + Verdict string // "accept" | "escalate" | "error" + Feedback string +} + +// VerifierFn is the interface the orchestrator uses to verify local output. +type VerifierFn interface { + Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) +} + +// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run. +type ExecutorRunFn func(ctx context.Context, req Request) (Result, error) + +// Orchestrator walks an escalation chain, delegating generation and verification. +// It implements the ExecutorFn shape expected by skill handlers. +type Orchestrator struct { + chain []ChainEntry + localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil + cloudRun ExecutorRunFn // for cloud tiers; may be nil + verifier VerifierFn + llamaSwapURL string + attempts *[]AttemptRecord +} + +// NewOrchestrator creates an Orchestrator. +// attempts is a pointer to a slice that will be appended to on each tier attempt. +// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain. +func NewOrchestrator( + chain []ChainEntry, + localRun ExecutorRunFn, + cloudRun ExecutorRunFn, + verifier VerifierFn, + llamaSwapURL string, + attempts *[]AttemptRecord, +) *Orchestrator { + return &Orchestrator{ + chain: chain, + localRun: localRun, + cloudRun: cloudRun, + verifier: verifier, + llamaSwapURL: llamaSwapURL, + attempts: attempts, + } +} + +// Run walks the escalation chain and returns the first accepted result. +// It satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error). +func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) { + taskPrompt := req.TaskPrompt + + for _, entry := range o.chain { + warm := o.probeWarm(entry.Model) + start := time.Now() + + tierReq := req + tierReq.Model = entry.Model + tierReq.TaskPrompt = taskPrompt + + var result Result + var genErr error + + if entry.IsCloud { + result, genErr = o.cloudRun(ctx, tierReq) + dur := time.Since(start).Milliseconds() + rec := AttemptRecord{ + Model: entry.Model, + Tier: entry.Tier, + DurationMs: dur, + WarmStart: warm, + Verdict: "accept", + } + if genErr != nil { + rec.Verdict = "error" + } + o.appendAttempt(rec) + if genErr == nil { + return result, nil + } + continue + } + + // Local tier. + result, genErr = o.localRun(ctx, tierReq) + dur := time.Since(start).Milliseconds() + + if genErr != nil { + o.appendAttempt(AttemptRecord{ + Model: entry.Model, + Tier: entry.Tier, + DurationMs: dur, + WarmStart: warm, + Verdict: "error", + Feedback: genErr.Error(), + }) + continue + } + + verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result) + if verErr != nil { + // Treat verifier failure as escalate (safe default). + o.appendAttempt(AttemptRecord{ + Model: entry.Model, + Tier: entry.Tier, + DurationMs: dur, + WarmStart: warm, + Verdict: "escalate", + Feedback: "verifier error: " + verErr.Error(), + }) + continue + } + + if verdict.Accept { + o.appendAttempt(AttemptRecord{ + Model: entry.Model, + Tier: entry.Tier, + DurationMs: dur, + WarmStart: warm, + Verdict: "accept", + }) + return result, nil + } + + o.appendAttempt(AttemptRecord{ + Model: entry.Model, + Tier: entry.Tier, + DurationMs: dur, + WarmStart: warm, + Verdict: "escalate", + Feedback: verdict.Feedback, + }) + // Inject verifier feedback into the next tier's task prompt. + taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback + } + + return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain)) +} + +func (o *Orchestrator) appendAttempt(rec AttemptRecord) { + if o.attempts != nil { + *o.attempts = append(*o.attempts, rec) + } +} + +// probeWarm checks whether the model is currently loaded in llama-swap. +// Returns false on any error or if llamaSwapURL is empty. +func (o *Orchestrator) probeWarm(model string) bool { + if o.llamaSwapURL == "" { + return false + } + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil) + if err != nil { + return false + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return false + } + defer resp.Body.Close() //nolint:errcheck + body, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + return strings.Contains(string(body), model) +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +go test ./internal/exec/... -run TestOrchestrator -v +``` + +Expected: all 5 orchestrator tests PASS. + +- [ ] **Step 5: Run all exec tests** + +```bash +go test ./internal/exec/... -v +``` + +Expected: all tests pass. + +- [ ] **Step 6: Commit** + +```bash +git add internal/exec/orchestrator.go internal/exec/orchestrator_test.go +git commit -m "feat(exec): add Orchestrator chain walker with verification and warm-state logging" +``` + +--- + +### Task 7: Wire orchestrators in main.go + +**Files:** +- Modify: `cmd/supervisor/main.go` + +Replace the single `executor.Run` passed to each skill with a per-skill `Orchestrator`. All skill handlers are unchanged — they still call `ExecutorFn` exactly as before. The `models.Resolve` call is replaced by `models.ChainFor`. A shared `LiteLLMExecutor`, `Verifier`, and `claudeExecutor` are created once and shared across all orchestrators. + +- [ ] **Step 1: Read the current main.go** + +Verify it has 6 skill registrations using `ExecutorFn: executor.Run` — lines 98–145. + +- [ ] **Step 2: Update main.go** + +Replace the full `main.go` contents. The critical changes: +1. Build `litellmExec` from `iexec.NewLiteLLM` +2. Build `verifier` from `iexec.NewVerifier` +3. Add `func buildOrch(...)` helper to keep registration readable +4. Replace `ExecutorFn: executor.Run` with `ExecutorFn: buildOrch(...).Run` for each skill + +```go +package main + +import ( + "context" + "log/slog" + "net/http" + "os" + + "github.com/mathiasbq/supervisor/internal/config" + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/mcp" + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/mathiasbq/supervisor/internal/skills/brain" + skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug" + "github.com/mathiasbq/supervisor/internal/skills/org" + "github.com/mathiasbq/supervisor/internal/skills/retrospective" + "github.com/mathiasbq/supervisor/internal/skills/review" + "github.com/mathiasbq/supervisor/internal/skills/sessionlog" + "github.com/mathiasbq/supervisor/internal/skills/spec" + "github.com/mathiasbq/supervisor/internal/skills/tdd" + "github.com/mathiasbq/supervisor/internal/skills/trainer" + "github.com/mathiasbq/supervisor/internal/tier" +) + +func main() { + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) + + cfg, err := config.Load() + if err != nil { + logger.Error("load config", "err", err) + os.Exit(1) + } + + models, err := config.LoadModels(cfg.ModelsFile) + if err != nil { + logger.Error("load models", "err", err) + os.Exit(1) + } + + systemPrompt, err := os.ReadFile(cfg.ConfigDir + "/CLAUDE.md") + if err != nil { + logger.Error("read supervisor CLAUDE.md", "path", cfg.ConfigDir+"/CLAUDE.md", "err", err) + os.Exit(1) + } + + tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md") + if err != nil { + logger.Error("read tdd.md", "path", cfg.ConfigDir+"/tdd.md", "err", err) + os.Exit(1) + } + + retroPrompt, err := os.ReadFile(cfg.ConfigDir + "/retrospective.md") + if err != nil { + logger.Error("read retrospective.md", "path", cfg.ConfigDir+"/retrospective.md", "err", err) + os.Exit(1) + } + + reviewPrompt, err := os.ReadFile(cfg.ConfigDir + "/review.md") + if err != nil { + logger.Error("read review.md", "path", cfg.ConfigDir+"/review.md", "err", err) + os.Exit(1) + } + + debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md") + if err != nil { + logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err) + os.Exit(1) + } + + specPrompt, err := os.ReadFile(cfg.ConfigDir + "/spec.md") + if err != nil { + logger.Error("read spec.md", "path", cfg.ConfigDir+"/spec.md", "err", err) + os.Exit(1) + } + + trainerReaderPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-reader.md") + if err != nil { + logger.Error("read trainer-reader.md", "path", cfg.ConfigDir+"/trainer-reader.md", "err", err) + os.Exit(1) + } + trainerWriterPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-writer.md") + if err != nil { + logger.Error("read trainer-writer.md", "path", cfg.ConfigDir+"/trainer-writer.md", "err", err) + os.Exit(1) + } + + claudeExec := iexec.New(iexec.Config{ + SystemPrompt: string(systemPrompt), + LiteLLMBaseURL: cfg.LiteLLMBaseURL, + LiteLLMAPIKey: cfg.LiteLLMAPIKey, + }) + + litellmExec := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0) + + verifier := iexec.NewVerifier("", models.Verifier(), 0) + + // buildOrch creates a per-skill Orchestrator. Each skill gets its own + // attempt log; the caller is responsible for saving it to the session log. + buildOrch := func(skill, override string) *iexec.Orchestrator { + rawChain := models.ChainFor(skill, override) + chain := make([]iexec.ChainEntry, len(rawChain)) + for i, m := range rawChain { + chain[i] = iexec.EntryFor(m) + } + attempts := make([]iexec.AttemptRecord, 0, len(chain)) + return iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts) + } + + tierFn := func(ctx context.Context) tier.Info { + return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL) + } + + reg := registry.New() + reg.Register(tdd.New(tdd.Config{ + SystemPrompt: string(systemPrompt), + SkillPrompt: string(tddPrompt), + DefaultModel: models.ChainFor("tdd", "")[0], + ExecutorFn: buildOrch("tdd", "").Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(brain.New(brain.Config{ + IngestBaseURL: cfg.IngestBaseURL, + })) + reg.Register(org.New(org.Config{ + TierFn: tierFn, + })) + reg.Register(sessionlog.New(sessionlog.Config{ + SessionsDir: cfg.SessionsDir, + })) + reg.Register(retrospective.New(retrospective.Config{ + SkillPrompt: string(retroPrompt), + DefaultModel: models.ChainFor("retrospective", "")[0], + SessionsDir: cfg.SessionsDir, + ExecutorFn: buildOrch("retrospective", "").Run, + })) + reg.Register(review.New(review.Config{ + SkillPrompt: string(reviewPrompt), + DefaultModel: models.ChainFor("review", "")[0], + ExecutorFn: buildOrch("review", "").Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(skilldebug.New(skilldebug.Config{ + SkillPrompt: string(debugPrompt), + DefaultModel: models.ChainFor("debug", "")[0], + ExecutorFn: buildOrch("debug", "").Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(spec.New(spec.Config{ + SkillPrompt: string(specPrompt), + DefaultModel: models.ChainFor("spec", "")[0], + ExecutorFn: buildOrch("spec", "").Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(trainer.New(trainer.Config{ + ReaderPrompt: string(trainerReaderPrompt), + WriterPrompt: string(trainerWriterPrompt), + DefaultModel: models.ChainFor("trainer", "")[0], + ExecutorFn: buildOrch("trainer", "").Run, + SessionsDir: cfg.SessionsDir, + BrainDir: cfg.BrainDir, + })) + + srv := mcp.NewServer(reg) + mux := http.NewServeMux() + mux.Handle("/mcp", srv) + + addr := ":" + cfg.Port + logger.Info("supervisor starting", "addr", addr) + if err := http.ListenAndServe(addr, mux); err != nil { + logger.Error("server stopped", "err", err) + os.Exit(1) + } +} +``` + +- [ ] **Step 3: Build to verify compilation** + +```bash +go build ./... +``` + +Expected: clean build — no errors. + +- [ ] **Step 4: Run all tests** + +```bash +go test ./... +``` + +Expected: all tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add cmd/supervisor/main.go +git commit -m "feat(main): wire per-skill Orchestrators replacing single executor.Run" +``` + +--- + +### Task 8: Ship v0.3.0 + +**Files:** none (CI + tagging only) + +- [ ] **Step 1: Run full check** + +```bash +cd /Users/mathias/Documents/local-dev/AI/supervisor +task check +``` + +Expected: lint + vet + test all pass. + +- [ ] **Step 2: Tag** + +```bash +git tag v0.3.0 -m "feat: model orchestration with per-skill chains and Claude verification" +``` + +- [ ] **Step 3: Push with follow-tags** + +```bash +git push && git push --follow-tags +``` + +Expected: Gitea CI job triggers, tag v0.3.0 pushed. Mirror job should also succeed. + +- [ ] **Step 4: Verify CI** + +Check Gitea CI passes. If the mirror job fails on tag, check that the tag doesn't already exist on GitHub. + +--- + +## Self-review + +**Spec coverage check:** + +| Spec requirement | Covered by | +|-----------------|------------| +| Each skill dispatches to configured local model via LiteLLM | Task 4 (litellm.go) + Task 7 (buildOrch) | +| Claude verifies every local output | Task 5 (verifier.go) + Task 6 orchestrator loop | +| Escalation walks per-skill chain | Task 6 orchestrator.go | +| Every attempt logged (model, tier, duration, warm, verdict) | Task 1 (Attempt struct) + Task 6 (AttemptRecord) | +| Cloud tiers self-certify, no verifier call | Task 6 `if entry.IsCloud` branch | +| Zero changes to skill handlers | Task 7 — handlers untouched, only main.go wired | +| LiteLLMBaseURL already in config; no new env vars beyond LLAMA_SWAP_URL | models.yaml has llama_swap_url; no config.go change needed | +| Caller override collapses to single-entry chain | Task 2 ChainFor override path + tests | +| One attempt per tier before escalating | Task 6 — no retry loop within a tier | + +**Note on LLAMA_SWAP_URL:** The llama-swap URL lives in `models.yaml` (`llama_swap_url: http://koala:8080`), not in an env var. The spec success criterion says "no new env vars required beyond `LLAMA_SWAP_URL`" — this plan interprets that as the URL being config-file-driven, which avoids any new env var entirely. If an env var override is later needed, it can be added to `config.Config` in a follow-up. + +**Note on session logging of AttemptRecord:** The orchestrator collects `AttemptRecord` slices in memory. The session JSONL write (via `session.Append`) happens in the skill handlers — which already append an `Entry` with `Attempts []session.Attempt`. In this plan the `AttemptRecord` type lives in the exec package and `session.Attempt` lives in the session package; they are parallel types. A follow-up could unify them, but the skill handlers will need to translate the orchestrator's records into `session.Attempt` structs. Since skill handlers are not changed in this phase (per spec constraint), the translation will need to happen when Phase 4 unifies observability. For now, the orchestrator accumulates records for future use and the existing `session.Attempt{Verified}` field continues to be set by skill handlers as before. diff --git a/docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md b/docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md new file mode 100644 index 0000000..898fc84 --- /dev/null +++ b/docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md @@ -0,0 +1,1073 @@ +# Phase 4: AttemptRecord Wiring + Shared PrependHistory Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Wire orchestrator `AttemptRecord`s into the session JSONL log so every skill invocation records which models ran, verdicts, and timings; simultaneously eliminate the `prependHistory` copy-paste across four skill packages by exporting it from the `session` package. + +**Architecture:** Two changes composed: +1. `session.PrependHistory` — exported function replaces 4 identical private methods. Lives in `internal/session/history.go`. +2. `session.AttemptsFrom` — converter in new `internal/session/attempts.go` that turns `[]exec.AttemptRecord` into `[]session.Attempt`. Introduces `session → exec` dependency (no circular risk). +3. `exec.Result.Attempts` — new `[]AttemptRecord` field populated by `buildOrch` after `orch.Run`. Each skill handler calls `session.Append` after `ExecutorFn` if `session_id` is set. + +**Tech Stack:** Go stdlib, `internal/session`, `internal/exec`, `internal/skills/*`, `cmd/supervisor/main.go`. No new dependencies. + +--- + +## File map + +| Action | File | Responsibility | +|--------|------|---------------| +| Modify | `internal/session/history.go` | Add exported `PrependHistory` function | +| Modify | `internal/session/history_test.go` | Add `TestPrependHistory` cases | +| Create | `internal/session/attempts.go` | `AttemptsFrom([]exec.AttemptRecord) []Attempt` | +| Create | `internal/session/attempts_test.go` | Unit tests for `AttemptsFrom` | +| Modify | `internal/exec/result.go` | Add `Attempts []AttemptRecord` field to `Result` | +| Modify | `cmd/supervisor/main.go` | Set `result.Attempts = attempts` in `buildOrch` after `orch.Run` | +| Modify | `internal/skills/review/handlers.go` | Use `session.PrependHistory`, add `session.Append`, remove private method | +| Modify | `internal/skills/debug/handlers.go` | Same | +| Modify | `internal/skills/spec/handlers.go` | Same | +| Modify | `internal/skills/tdd/handlers.go` | Use `session.PrependHistory` for green/refactor, remove private method, add `session.Append` per phase | +| Modify | `internal/skills/retrospective/handlers.go` | Add `session.Append` after `ExecutorFn` | +| Modify | `internal/skills/trainer/handlers.go` | Add `session.Append` after writer agent | + +--- + +## Task 1: Export `PrependHistory` from session package + +**Files:** +- Modify: `internal/session/history.go` +- Modify: `internal/session/history_test.go` + +- [ ] **Step 1: Write failing test** + +Add to `internal/session/history_test.go`: + +```go +func TestPrependHistoryNoSessionID(t *testing.T) { + result := session.PrependHistory("", "", "review", "do the task") + assert.Equal(t, "do the task", result) +} + +func TestPrependHistoryNoLog(t *testing.T) { + dir := t.TempDir() + result := session.PrependHistory(dir, "sess-abc", "review", "do the task") + assert.Equal(t, "do the task", result) +} + +func TestPrependHistoryPrependsHistory(t *testing.T) { + dir := t.TempDir() + entry := session.Entry{ + SessionID: "sess-abc", Skill: "tdd", Phase: "red", + FinalStatus: "pass", Message: "wrote test", + Timestamp: time.Now(), + } + require.NoError(t, session.Append(dir, "sess-abc", entry)) + + result := session.PrependHistory(dir, "sess-abc", "review", "do the task") + assert.Contains(t, result, "## Session history") + assert.Contains(t, result, "wrote test") + assert.HasSuffix(t, result, "do the task") +} + +func TestPrependHistoryExcludesCurrentPhase(t *testing.T) { + dir := t.TempDir() + require.NoError(t, session.Append(dir, "sess-abc", session.Entry{ + SessionID: "sess-abc", Skill: "tdd", Phase: "red", + FinalStatus: "pass", Message: "red done", Timestamp: time.Now(), + })) + require.NoError(t, session.Append(dir, "sess-abc", session.Entry{ + SessionID: "sess-abc", Skill: "tdd", Phase: "green", + FinalStatus: "pass", Message: "green done", Timestamp: time.Now(), + })) + + result := session.PrependHistory(dir, "sess-abc", "green", "do the task") + assert.Contains(t, result, "red done") + assert.NotContains(t, result, "green done") +} +``` + +- [ ] **Step 2: Run tests, verify failure** + +```bash +cd internal/session && go test ./... -run TestPrepend -v +``` +Expected: `undefined: session.PrependHistory` + +- [ ] **Step 3: Add `PrependHistory` to `internal/session/history.go`** + +Append after the existing `FormatHistory` function: + +```go +// PrependHistory reads the session log for sessionID and prepends a formatted +// history block to task. Returns task unchanged if sessionID or sessionsDir is +// empty, or if no prior entries exist. +func PrependHistory(sessionsDir, sessionID, currentPhase, task string) string { + if sessionID == "" || sessionsDir == "" { + return task + } + entries, err := Read(sessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} +``` + +- [ ] **Step 4: Run tests, verify pass** + +```bash +cd internal/session && go test ./... -v +``` +Expected: all pass including the four new `TestPrependHistory*` tests. + +- [ ] **Step 5: Commit** + +```bash +git add internal/session/history.go internal/session/history_test.go +git commit -m "feat(session): export PrependHistory for shared use across skills" +``` + +--- + +## Task 2: Add `AttemptsFrom` converter + +**Files:** +- Create: `internal/session/attempts.go` +- Create: `internal/session/attempts_test.go` + +- [ ] **Step 1: Write failing test** + +Create `internal/session/attempts_test.go`: + +```go +package session_test + +import ( + "testing" + + "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAttemptsFromEmpty(t *testing.T) { + result := session.AttemptsFrom(nil) + assert.Empty(t, result) +} + +func TestAttemptsFromSetsIndex(t *testing.T) { + records := []exec.AttemptRecord{ + {Model: "ollama/phi4", Tier: "local", DurationMs: 1200, WarmStart: true, Verdict: "escalate", Feedback: "too vague"}, + {Model: "claude-sonnet-4-6", Tier: "subagent", DurationMs: 3400, WarmStart: false, Verdict: "accept"}, + } + result := session.AttemptsFrom(records) + require.Len(t, result, 2) + + assert.Equal(t, 1, result[0].Attempt) + assert.Equal(t, "ollama/phi4", result[0].Model) + assert.Equal(t, "local", result[0].Tier) + assert.Equal(t, int64(1200), result[0].DurationMs) + assert.True(t, result[0].WarmStart) + assert.Equal(t, "escalate", result[0].Verdict) + assert.Equal(t, "too vague", result[0].Feedback) + assert.False(t, result[0].Verified) + + assert.Equal(t, 2, result[1].Attempt) + assert.Equal(t, "claude-sonnet-4-6", result[1].Model) + assert.True(t, result[1].Verified) +} +``` + +- [ ] **Step 2: Run test, verify failure** + +```bash +cd internal/session && go test ./... -run TestAttemptsFrom -v +``` +Expected: `undefined: session.AttemptsFrom` + +- [ ] **Step 3: Create `internal/session/attempts.go`** + +```go +// internal/session/attempts.go +package session + +import iexec "github.com/mathiasbq/supervisor/internal/exec" + +// AttemptsFrom converts exec.AttemptRecord slice to session.Attempt slice +// for writing into a session JSONL entry. +func AttemptsFrom(records []iexec.AttemptRecord) []Attempt { + if len(records) == 0 { + return nil + } + out := make([]Attempt, len(records)) + for i, r := range records { + out[i] = Attempt{ + Attempt: i + 1, + Model: r.Model, + Tier: r.Tier, + DurationMs: r.DurationMs, + WarmStart: r.WarmStart, + Verdict: r.Verdict, + Feedback: r.Feedback, + Verified: r.Verdict == "accept", + } + } + return out +} +``` + +- [ ] **Step 4: Run tests, verify pass** + +```bash +cd internal/session && go test ./... -v +``` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add internal/session/attempts.go internal/session/attempts_test.go +git commit -m "feat(session): add AttemptsFrom converter for exec.AttemptRecord" +``` + +--- + +## Task 3: Add `Attempts` field to `exec.Result` and wire in `buildOrch` + +**Files:** +- Modify: `internal/exec/result.go` +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Add `Attempts` to `exec.Result`** + +In `internal/exec/result.go`, add one field to the `Result` struct after `Message`: + +```go +type Result struct { + Status string `json:"status"` + Phase string `json:"phase"` + Skill string `json:"skill"` + FilePath string `json:"file_path"` + RunnerOutput string `json:"runner_output"` + Verified bool `json:"verified"` + ModelUsed string `json:"model_used"` + Message string `json:"message"` + Attempts []AttemptRecord `json:"attempts,omitempty"` // populated by orchestrator, not Claude +} +``` + +- [ ] **Step 2: Run tests, verify no regressions** + +```bash +go test ./internal/exec/... -v +``` +Expected: all existing tests pass (adding a field is backward-compatible). + +- [ ] **Step 3: Wire attempts into `buildOrch` in `cmd/supervisor/main.go`** + +Find the `buildOrch` closure and add one line after `orch.Run`: + +```go +buildOrch := func(skill string) func(ctx context.Context, req iexec.Request) (iexec.Result, error) { + return func(ctx context.Context, req iexec.Request) (iexec.Result, error) { + rawChain := models.ChainFor(skill, req.Model) + chain := make([]iexec.ChainEntry, len(rawChain)) + for i, m := range rawChain { + chain[i] = iexec.EntryFor(m) + } + attempts := make([]iexec.AttemptRecord, 0, len(chain)) + orch := iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts) + result, err := orch.Run(ctx, req) + result.Attempts = attempts // attach orchestration metadata before returning + return result, err + } +} +``` + +- [ ] **Step 4: Build to verify no compile errors** + +```bash +go build ./... +``` +Expected: clean build. + +- [ ] **Step 5: Commit** + +```bash +git add internal/exec/result.go cmd/supervisor/main.go +git commit -m "feat(exec): surface AttemptRecord slice on Result for session logging" +``` + +--- + +## Task 4: Update `review` and `debug` skill handlers + +**Files:** +- Modify: `internal/skills/review/handlers.go` +- Modify: `internal/skills/debug/handlers.go` + +- [ ] **Step 1: Rewrite `review/handlers.go`** + +```go +// internal/skills/review/handlers.go +package review + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type reviewArgs struct { + ProjectRoot string `json:"project_root"` + Files []string `json:"files"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "review" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a reviewArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if len(a.Files) == 0 { + return nil, fmt.Errorf("files is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model, + ) + task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "review", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + t0 := time.Now() + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + + if a.SessionID != "" && s.cfg.SessionsDir != "" { + _ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now(), + Skill: "review", + Phase: "review", + ProjectRoot: a.ProjectRoot, + Attempts: session.AttemptsFrom(result.Attempts), + FinalStatus: result.Status, + FilePath: result.FilePath, + ModelUsed: result.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: result.Message, + }) + } + + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 2: Rewrite `debug/handlers.go`** + +```go +// internal/skills/debug/handlers.go +package debug + +import ( + "context" + "encoding/json" + "fmt" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type debugArgs struct { + ProjectRoot string `json:"project_root"` + Error string `json:"error"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "debug" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a debugArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Error == "" { + return nil, fmt.Errorf("error is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Error, a.Context, model, + ) + task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "debug", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + t0 := time.Now() + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + + if a.SessionID != "" && s.cfg.SessionsDir != "" { + _ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now(), + Skill: "debug", + Phase: "debug", + ProjectRoot: a.ProjectRoot, + Attempts: session.AttemptsFrom(result.Attempts), + FinalStatus: result.Status, + ModelUsed: result.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: result.Message, + }) + } + + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 3: Build and test** + +```bash +go build ./... && go test ./internal/skills/review/... ./internal/skills/debug/... -v +``` +Expected: clean build and all existing tests pass. + +- [ ] **Step 4: Commit** + +```bash +git add internal/skills/review/handlers.go internal/skills/debug/handlers.go +git commit -m "feat(skills): wire session.Append and PrependHistory into review and debug" +``` + +--- + +## Task 5: Update `spec` skill handler + +**Files:** +- Modify: `internal/skills/spec/handlers.go` + +- [ ] **Step 1: Rewrite `spec/handlers.go`** + +```go +// internal/skills/spec/handlers.go +package spec + +import ( + "context" + "encoding/json" + "fmt" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type specArgs struct { + ProjectRoot string `json:"project_root"` + Requirements string `json:"requirements"` + OutputPath string `json:"output_path"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "spec" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a specArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Requirements == "" { + return nil, fmt.Errorf("requirements is required") + } + outputPath := a.OutputPath + if outputPath == "" { + outputPath = "docs/spec.md" + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Requirements, outputPath, a.Context, model, + ) + task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "spec", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + t0 := time.Now() + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, err + } + + if a.SessionID != "" && s.cfg.SessionsDir != "" { + _ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now(), + Skill: "spec", + Phase: "spec", + ProjectRoot: a.ProjectRoot, + Attempts: session.AttemptsFrom(result.Attempts), + FinalStatus: result.Status, + FilePath: result.FilePath, + ModelUsed: result.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: result.Message, + }) + } + + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 2: Build and test** + +```bash +go build ./... && go test ./internal/skills/spec/... -v +``` +Expected: clean build, all tests pass. + +- [ ] **Step 3: Commit** + +```bash +git add internal/skills/spec/handlers.go +git commit -m "feat(skills): wire session.Append and PrependHistory into spec" +``` + +--- + +## Task 6: Update `tdd` skill handler + +**Files:** +- Modify: `internal/skills/tdd/handlers.go` + +- [ ] **Step 1: Rewrite `tdd/handlers.go`** + +```go +package tdd + +import ( + "context" + "encoding/json" + "fmt" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + switch tool { + case "tdd_red": + return s.handleRed(ctx, args) + case "tdd_green": + return s.handleGreen(ctx, args) + case "tdd_refactor": + return s.handleRefactor(ctx, args) + default: + return nil, fmt.Errorf("unknown tool: %s", tool) + } +} + +type redArgs struct { + ProjectRoot string `json:"project_root"` + Spec string `json:"spec"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` +} + +func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args redArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.Spec == "" { + return nil, fmt.Errorf("spec is required") + } + task := fmt.Sprintf( + "phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd, + ) + return s.execute(ctx, task) +} + +type greenArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` +} + +func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args greenArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + task := fmt.Sprintf( + "phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd, + ) + task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task) + + t0 := time.Now() + result, err := s.execute(ctx, task) + if err != nil { + return nil, err + } + s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "green", t0, result) + return result, nil +} + +type refactorArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + ImplPath string `json:"impl_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` +} + +func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args refactorArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + if args.ImplPath == "" { + return nil, fmt.Errorf("impl_path is required") + } + task := fmt.Sprintf( + "phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd, + ) + task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task) + + t0 := time.Now() + result, err := s.execute(ctx, task) + if err != nil { + return nil, err + } + s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "refactor", t0, result) + return result, nil +} + +func (s *Skill) resolveModel(override string) string { + if override != "" { + return override + } + return s.cfg.DefaultModel +} + +// execute calls ExecutorFn and returns the marshaled result. +func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) { + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + req := iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + } + result, err := s.cfg.ExecutorFn(ctx, req) + if err != nil { + return nil, err + } + return json.Marshal(result) +} + +// logAttempt writes a session.Entry for a completed phase if session_id is set. +// raw is the marshaled Result returned by execute; we unmarshal to extract fields. +func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.Time, raw json.RawMessage) { + if sessionID == "" || s.cfg.SessionsDir == "" { + return + } + var result iexec.Result + if err := json.Unmarshal(raw, &result); err != nil { + return + } + _ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{ + SessionID: sessionID, + Timestamp: time.Now(), + Skill: skill, + Phase: phase, + ProjectRoot: projectRoot, + Attempts: session.AttemptsFrom(result.Attempts), + FinalStatus: result.Status, + FilePath: result.FilePath, + ModelUsed: result.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: result.Message, + }) +} +``` + +- [ ] **Step 2: Build and test** + +```bash +go build ./... && go test ./internal/skills/tdd/... -v +``` +Expected: clean build, all existing tests pass. + +- [ ] **Step 3: Commit** + +```bash +git add internal/skills/tdd/handlers.go +git commit -m "feat(skills): wire session.Append and PrependHistory into tdd" +``` + +--- + +## Task 7: Update `retrospective` and `trainer` handlers + +**Files:** +- Modify: `internal/skills/retrospective/handlers.go` +- Modify: `internal/skills/trainer/handlers.go` + +- [ ] **Step 1: Update `retrospective/handlers.go`** — add `session.Append` after `ExecutorFn`: + +```go +// internal/skills/retrospective/handlers.go +package retrospective + +import ( + "context" + "encoding/json" + "fmt" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type retroArgs struct { + SessionID string `json:"session_id"` + Model string `json:"model,omitempty"` +} + +// Handle dispatches the retrospective tool call. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "retrospective" { + return nil, fmt.Errorf("unknown retrospective tool: %s", tool) + } + var a retroArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + entries, err := session.Read(s.cfg.SessionsDir, a.SessionID) + if err != nil { + return nil, fmt.Errorf("read session log: %w", err) + } + + logJSON, err := json.MarshalIndent(entries, "", " ") + if err != nil { + return nil, fmt.Errorf("marshal session log: %w", err) + } + + taskPrompt := fmt.Sprintf( + "SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Write structured entries to brain/raw/ via brain_write. Return JSON result when done.", + a.SessionID, string(logJSON), + ) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + t0 := time.Now() + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: taskPrompt, + Model: model, + Tools: "Bash,Read,Write", + }) + if err != nil { + return nil, fmt.Errorf("retrospective worker: %w", err) + } + + _ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now(), + Skill: "retrospective", + Phase: "retrospective", + Attempts: session.AttemptsFrom(result.Attempts), + FinalStatus: result.Status, + ModelUsed: result.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: result.Message, + }) + + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 2: Update `trainer/handlers.go`** — add `session.Append` after writer agent: + +```go +// internal/skills/trainer/handlers.go +package trainer + +import ( + "context" + "encoding/json" + "fmt" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type trainArgs struct { + SessionID string `json:"session_id"` + Model string `json:"model"` +} + +// Handle dispatches the MCP tool call to the trainer handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "trainer" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a trainArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + entries, err := session.Read(s.cfg.SessionsDir, a.SessionID) + if err != nil { + return nil, fmt.Errorf("read session log: %w", err) + } + + // ── Step 1: Reader agent ───────────────────────────────────────────────── + history := session.FormatHistory(entries, "") + readerTask := fmt.Sprintf( + "role: reader\nsession_id: %s\nbrain_dir: %s\n\n%s", + a.SessionID, s.cfg.BrainDir, history, + ) + readerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.ReaderPrompt, + TaskPrompt: readerTask, + Model: model, + Tools: "Read", + }) + if err != nil { + return nil, fmt.Errorf("reader agent: %w", err) + } + + // ── Step 2: Writer agent (receives reader candidates) ──────────────────── + t0 := time.Now() + writerTask := fmt.Sprintf( + "role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_summary: %s\nreader_candidates:\n%s", + a.SessionID, s.cfg.BrainDir, readerResult.Message, readerResult.RunnerOutput, + ) + writerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.WriterPrompt, + TaskPrompt: writerTask, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, fmt.Errorf("writer agent: %w", err) + } + + _ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{ + SessionID: a.SessionID, + Timestamp: time.Now(), + Skill: "trainer", + Phase: "trainer", + Attempts: session.AttemptsFrom(writerResult.Attempts), + FinalStatus: writerResult.Status, + ModelUsed: writerResult.ModelUsed, + DurationMs: time.Since(t0).Milliseconds(), + Message: writerResult.Message, + }) + + b, err := json.Marshal(writerResult) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} +``` + +- [ ] **Step 3: Build and test** + +```bash +go build ./... && go test ./internal/skills/retrospective/... ./internal/skills/trainer/... -v +``` +Expected: clean build, all tests pass. + +- [ ] **Step 4: Commit** + +```bash +git add internal/skills/retrospective/handlers.go internal/skills/trainer/handlers.go +git commit -m "feat(skills): wire session.Append into retrospective and trainer" +``` + +--- + +## Task 8: Full test suite + push + verify deployment + +- [ ] **Step 1: Run full test suite** + +```bash +go test ./... -v 2>&1 | tail -30 +``` +Expected: all packages pass, no failures. + +- [ ] **Step 2: Run linter** + +```bash +task check +``` +Expected: clean. + +- [ ] **Step 3: Push to trigger CD** + +```bash +git push +``` + +- [ ] **Step 4: Watch CD pipeline** + +```bash +# Poll until complete (replace RUN_ID with the new run): +curl -s "https://gitea.d-ma.be/api/v1/repos/mathias/hyperguild/actions/runs?limit=2" \ + -H "Authorization: token 736a8c36adcc6ecb41fff56e5ae4d0eb3105a670" \ + | python3 -c "import sys,json; [print(r['id'],r['status'],r.get('conclusion','—')) for r in json.load(sys.stdin)['workflow_runs']]" +``` + +- [ ] **Step 5: Verify pod rolled to new image** + +```bash +ssh koala "kubectl get pod -n supervisor -o wide && kubectl logs -n supervisor deployment/supervisor --tail=3" +``` +Expected: new pod SHA in image tag, `supervisor starting` log line. + +- [ ] **Step 6: Smoke-test MCP responds** + +```bash +ssh koala "curl -s -X POST http://10.43.197.185:3200/mcp \ + -H 'Content-Type: application/json' \ + -d '{\"jsonrpc\":\"2.0\",\"method\":\"tools/list\",\"params\":{},\"id\":1}' \ + | python3 -c \"import sys,json; tools=json.load(sys.stdin)['result']['tools']; print(len(tools), 'tools OK')\"" +``` +Expected: `12 tools OK` diff --git a/ingestion/internal/api/handler.go b/ingestion/internal/api/handler.go index ff26483..98cc784 100644 --- a/ingestion/internal/api/handler.go +++ b/ingestion/internal/api/handler.go @@ -99,7 +99,11 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) { finalContent = fm.String() + req.Content } - dest := filepath.Join(rawDir, filepath.Base(filename)) + base := filepath.Base(filename) + if !strings.HasSuffix(base, ".md") { + base += ".md" + } + dest := filepath.Join(rawDir, base) if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil { h.logger.Error("write failed", "err", err) http.Error(w, "write error", http.StatusInternalServerError)