From 7d108b2911b93d74b1390250a2c3f0506f7327bd Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Fri, 17 Apr 2026 06:56:48 +0200 Subject: [PATCH] docs: add supervisor MCP server implementation plan 12 TDD tasks covering: Go module, Result type, Config, Models, Executor (claude --print), Registry, MCP server, TDD skill, main wiring, config files, Taskfile/MCP registration, smoke test. Co-Authored-By: Claude Sonnet 4.6 --- .../plans/2026-04-16-supervisor-mcp-server.md | 1813 +++++++++++++++++ 1 file changed, 1813 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-16-supervisor-mcp-server.md diff --git a/docs/superpowers/plans/2026-04-16-supervisor-mcp-server.md b/docs/superpowers/plans/2026-04-16-supervisor-mcp-server.md new file mode 100644 index 0000000..d9e4492 --- /dev/null +++ b/docs/superpowers/plans/2026-04-16-supervisor-mcp-server.md @@ -0,0 +1,1813 @@ +# Supervisor MCP Server Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a Go MCP server that exposes TDD skill workers as tools Claude Code can call natively, with a Claude supervisor instance handling orchestration and verification. + +**Architecture:** A thin Go HTTP server routes MCP tool calls to skill handlers. Each handler spawns `claude --print` with `--bare --system-prompt --json-schema --permission-mode bypassPermissions`, injecting supervisor rules and skill discipline. The spawned Claude instance uses its own Bash/Read/Write tools to run tests and generate code, returning a structured JSON result. LiteLLM on iguana handles Ollama model delegation. + +**Tech Stack:** Go 1.26, net/http (stdlib), gopkg.in/yaml.v3, testify, claude CLI, LiteLLM + +--- + +## File Map + +| File | Responsibility | +|------|---------------| +| `go.mod` | Module definition | +| `cmd/supervisor/main.go` | Entry point, wires all dependencies | +| `internal/config/config.go` | Env vars → typed Config struct | +| `internal/config/config_test.go` | Config parsing tests | +| `internal/config/models.go` | YAML model routing table | +| `internal/config/models_test.go` | Model resolution tests | +| `internal/exec/result.go` | Result struct + JSON schema constant | +| `internal/exec/result_test.go` | JSON parsing/validation tests | +| `internal/exec/executor.go` | Spawn claude, capture output, parse result | +| `internal/exec/executor_test.go` | Executor tests using fake binary | +| `internal/registry/registry.go` | Skill interface + registry | +| `internal/registry/registry_test.go` | Registry routing tests | +| `internal/mcp/server.go` | HTTP/JSON-RPC MCP server | +| `internal/mcp/server_test.go` | MCP protocol tests | +| `internal/skills/tdd/skill.go` | Tool definitions (MCP schema) | +| `internal/skills/tdd/handlers.go` | tdd_red, tdd_green, tdd_refactor | +| `internal/skills/tdd/handlers_test.go` | Handler prompt-building tests | +| `config/supervisor/CLAUDE.md` | Supervisor orchestration rules | +| `config/supervisor/tdd.md` | TDD iron law (injected per call) | +| `config/models.yaml` | Per-skill model routing table | +| `.env.example` | Documented env vars | +| `Taskfile.yml` | Add `supervisor:dev` and `supervisor:build` tasks | +| `.context/mcp.json` | Register supervisor as MCP server | + +--- + +## Task 1: Go module + project skeleton + +**Files:** +- Create: `go.mod` +- Create: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write a failing test to verify the binary compiles and exits cleanly** + +Create `cmd/supervisor/main_test.go`: +```go +package main + +import ( + "os/exec" + "testing" +) + +func TestBinaryCompiles(t *testing.T) { + cmd := exec.Command("go", "build", "./...") + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("build failed: %s\n%s", err, out) + } +} +``` + +- [ ] **Step 2: Run test — expect it to fail (no go.mod yet)** + +```bash +go test ./... +``` +Expected: error — no go.mod + +- [ ] **Step 3: Initialize the module** + +```bash +go mod init github.com/mathiasbq/supervisor +``` + +- [ ] **Step 4: Create `cmd/supervisor/main.go`** + +```go +package main + +import ( + "fmt" + "os" +) + +func main() { + fmt.Fprintln(os.Stderr, "supervisor: not yet implemented") + os.Exit(1) +} +``` + +- [ ] **Step 5: Run test — expect it to pass** + +```bash +go test ./cmd/supervisor/... +``` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add go.mod cmd/ +git commit -m "chore: initialize go module and cmd skeleton" +``` + +--- + +## Task 2: Result type + +**Files:** +- Create: `internal/exec/result.go` +- Create: `internal/exec/result_test.go` + +- [ ] **Step 1: Write failing tests for result parsing** + +Create `internal/exec/result_test.go`: +```go +package exec_test + +import ( + "encoding/json" + "testing" + + "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestResultParsesValidJSON(t *testing.T) { + raw := `{ + "status": "pass", + "phase": "red", + "skill": "tdd", + "file_path": "/tmp/foo_test.go", + "runner_output": "--- FAIL: TestFoo", + "verified": true, + "model_used": "self", + "message": "test fails as expected" + }` + var r exec.Result + require.NoError(t, json.Unmarshal([]byte(raw), &r)) + assert.Equal(t, "pass", r.Status) + assert.Equal(t, "red", r.Phase) + assert.True(t, r.Verified) +} + +func TestResultValidation(t *testing.T) { + tests := []struct { + name string + result exec.Result + wantErr bool + }{ + { + name: "valid pass result", + result: exec.Result{ + Status: "pass", Phase: "red", Skill: "tdd", + FilePath: "/tmp/x_test.go", RunnerOutput: "FAIL", + Verified: true, ModelUsed: "self", Message: "ok", + }, + wantErr: false, + }, + { + name: "empty status", + result: exec.Result{Phase: "red", Skill: "tdd"}, + wantErr: true, + }, + { + name: "invalid status", + result: exec.Result{Status: "unknown", Phase: "red", Skill: "tdd"}, + wantErr: true, + }, + { + name: "invalid phase", + result: exec.Result{Status: "pass", Phase: "bad", Skill: "tdd"}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.result.Validate() + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} +``` + +- [ ] **Step 2: Run — expect FAIL (package does not exist)** + +```bash +go test ./internal/exec/... +``` +Expected: FAIL — cannot find package + +- [ ] **Step 3: Install testify** + +```bash +go get github.com/stretchr/testify@latest +``` + +- [ ] **Step 4: Create `internal/exec/result.go`** + +```go +package exec + +import ( + "errors" + "strings" +) + +// Result is the structured JSON output from every supervisor invocation. +// The JSON schema constant is passed to claude via --json-schema so Claude +// validates its own output before returning. +type Result struct { + Status string `json:"status"` // pass | fail | error + Phase string `json:"phase"` // red | green | refactor + Skill string `json:"skill"` // tdd | review | ... + FilePath string `json:"file_path"` // absolute path to generated file + RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner + Verified bool `json:"verified"` // based on exit code, never self-report + ModelUsed string `json:"model_used"` // model name or "self" + Message string `json:"message"` // one sentence summary +} + +var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true} +var validPhases = map[string]bool{"red": true, "green": true, "refactor": true} + +func (r Result) Validate() error { + var errs []string + if !validStatuses[r.Status] { + errs = append(errs, "status must be pass|fail|error, got: "+r.Status) + } + if !validPhases[r.Phase] { + errs = append(errs, "phase must be red|green|refactor, got: "+r.Phase) + } + if r.Skill == "" { + errs = append(errs, "skill is required") + } + if len(errs) > 0 { + return errors.New(strings.Join(errs, "; ")) + } + return nil +} + +// Schema is passed to claude --json-schema to enforce structured output. +const Schema = `{ + "type": "object", + "required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"], + "properties": { + "status": {"type": "string", "enum": ["pass","fail","error"]}, + "phase": {"type": "string", "enum": ["red","green","refactor"]}, + "skill": {"type": "string"}, + "file_path": {"type": "string"}, + "runner_output": {"type": "string"}, + "verified": {"type": "boolean"}, + "model_used": {"type": "string"}, + "message": {"type": "string"} + } +}` +``` + +- [ ] **Step 5: Run — expect PASS** + +```bash +go test ./internal/exec/... +``` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add internal/exec/result.go internal/exec/result_test.go +git commit -m "feat: add Result type with JSON schema and validation" +``` + +--- + +## Task 3: Config package + +**Files:** +- Create: `internal/config/config.go` +- Create: `internal/config/config_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/config/config_test.go`: +```go +package config_test + +import ( + "testing" + + "github.com/mathiasbq/supervisor/internal/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLoadDefaults(t *testing.T) { + t.Setenv("SUPERVISOR_PORT", "") + t.Setenv("LITELLM_BASE_URL", "") + t.Setenv("LITELLM_API_KEY", "") + t.Setenv("SUPERVISOR_CONFIG_DIR", "") + + cfg, err := config.Load() + require.NoError(t, err) + assert.Equal(t, "3200", cfg.Port) + assert.Equal(t, "http://iguana:4000", cfg.LiteLLMBaseURL) + assert.Equal(t, "./config/supervisor", cfg.ConfigDir) +} + +func TestLoadFromEnv(t *testing.T) { + t.Setenv("SUPERVISOR_PORT", "4000") + t.Setenv("LITELLM_BASE_URL", "http://localhost:4000") + t.Setenv("LITELLM_API_KEY", "test-key") + t.Setenv("SUPERVISOR_CONFIG_DIR", "/etc/supervisor") + + cfg, err := config.Load() + require.NoError(t, err) + assert.Equal(t, "4000", cfg.Port) + assert.Equal(t, "http://localhost:4000", cfg.LiteLLMBaseURL) + assert.Equal(t, "test-key", cfg.LiteLLMAPIKey) + assert.Equal(t, "/etc/supervisor", cfg.ConfigDir) +} +``` + +- [ ] **Step 2: Run — expect FAIL** + +```bash +go test ./internal/config/... +``` +Expected: FAIL — cannot find package + +- [ ] **Step 3: Create `internal/config/config.go`** + +```go +package config + +import "os" + +type Config struct { + Port string // SUPERVISOR_PORT, default 3200 + LiteLLMBaseURL string // LITELLM_BASE_URL, default http://iguana:4000 + LiteLLMAPIKey string // LITELLM_API_KEY + ConfigDir string // SUPERVISOR_CONFIG_DIR, default ./config/supervisor + ModelsFile string // SUPERVISOR_MODELS_FILE, default /../models.yaml +} + +func Load() (Config, error) { + cfg := Config{ + Port: envOr("SUPERVISOR_PORT", "3200"), + LiteLLMBaseURL: envOr("LITELLM_BASE_URL", "http://iguana:4000"), + LiteLLMAPIKey: os.Getenv("LITELLM_API_KEY"), + ConfigDir: envOr("SUPERVISOR_CONFIG_DIR", "./config/supervisor"), + } + cfg.ModelsFile = envOr("SUPERVISOR_MODELS_FILE", cfg.ConfigDir+"/../models.yaml") + return cfg, nil +} + +func envOr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} +``` + +- [ ] **Step 4: Run — expect PASS** + +```bash +go test ./internal/config/... +``` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add internal/config/ +git commit -m "feat: add config package with env-var loading" +``` + +--- + +## Task 4: Models config + +**Files:** +- Create: `internal/config/models.go` +- Create: `internal/config/models_test.go` +- Create: `config/models.yaml` + +- [ ] **Step 1: Write failing tests** + +Append to `internal/config/models_test.go` (new file): +```go +package config_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/supervisor/internal/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestModelsResolve(t *testing.T) { + yaml := ` +default: ollama/default-model +skills: + tdd: ollama/qwen3-coder-30b-tuned + review: ollama/devstral-tuned +` + f := filepath.Join(t.TempDir(), "models.yaml") + require.NoError(t, os.WriteFile(f, []byte(yaml), 0644)) + + m, err := config.LoadModels(f) + require.NoError(t, err) + + assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.Resolve("tdd", "")) + assert.Equal(t, "ollama/devstral-tuned", m.Resolve("review", "")) + assert.Equal(t, "ollama/default-model", m.Resolve("unknown", "")) +} + +func TestModelsOverride(t *testing.T) { + yaml := ` +default: ollama/default-model +skills: + tdd: ollama/qwen3-coder-30b-tuned +` + f := filepath.Join(t.TempDir(), "models.yaml") + require.NoError(t, os.WriteFile(f, []byte(yaml), 0644)) + + m, err := config.LoadModels(f) + require.NoError(t, err) + + // caller override wins over skill default + assert.Equal(t, "anthropic/claude-sonnet-4-6", m.Resolve("tdd", "anthropic/claude-sonnet-4-6")) +} +``` + +- [ ] **Step 2: Run — expect FAIL** + +```bash +go test ./internal/config/... +``` +Expected: FAIL — Models type undefined + +- [ ] **Step 3: Install yaml dependency** + +```bash +go get gopkg.in/yaml.v3 +``` + +- [ ] **Step 4: Create `internal/config/models.go`** + +```go +package config + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type modelsFile struct { + Default string `yaml:"default"` + Skills map[string]string `yaml:"skills"` +} + +type Models struct { + data modelsFile +} + +func LoadModels(path string) (Models, error) { + raw, err := os.ReadFile(path) + if err != nil { + return Models{}, fmt.Errorf("load models: %w", err) + } + var f modelsFile + if err := yaml.Unmarshal(raw, &f); err != nil { + return Models{}, fmt.Errorf("parse models: %w", err) + } + return Models{data: f}, nil +} + +// Resolve returns the model for a skill, respecting three-layer priority: +// 1. override (from MCP call) — highest +// 2. per-skill default from models.yaml +// 3. global default +func (m Models) Resolve(skill, override string) string { + if override != "" { + return override + } + if model, ok := m.data.Skills[skill]; ok { + return model + } + return m.data.Default +} +``` + +- [ ] **Step 5: Run — expect PASS** + +```bash +go test ./internal/config/... +``` +Expected: PASS + +- [ ] **Step 6: Create `config/models.yaml`** + +```yaml +# Model routing table — three-layer priority: +# 1. model param in MCP tool call (caller override) +# 2. per-skill entry here +# 3. default (fallback) +default: ollama/qwen3-coder-30b-tuned + +skills: + tdd: ollama/qwen3-coder-30b-tuned + review: ollama/devstral-tuned + debug: ollama/deepseek-r1-tuned +``` + +- [ ] **Step 7: Commit** + +```bash +git add internal/config/models.go internal/config/models_test.go config/models.yaml +git commit -m "feat: add model routing table with three-layer priority" +``` + +--- + +## Task 5: Exec package + +**Files:** +- Create: `internal/exec/executor.go` +- Create: `internal/exec/executor_test.go` +- Modify: `internal/exec/result.go` (add Schema constant — already done in Task 2) + +- [ ] **Step 1: Write failing tests** + +Create `internal/exec/executor_test.go`: +```go +package exec_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeClaudePath writes a shell script that prints a fixed JSON result +// and returns its path. Used to test executor without invoking real claude. +func fakeClaudePath(t *testing.T, output string, exitCode int) string { + t.Helper() + dir := t.TempDir() + script := filepath.Join(dir, "claude") + var content string + if exitCode != 0 { + content = "#!/bin/sh\necho 'error' >&2\nexit 1\n" + } else { + content = "#!/bin/sh\necho '" + output + "'\n" + } + require.NoError(t, os.WriteFile(script, []byte(content), 0755)) + return script +} + +func TestExecutorParsesValidResult(t *testing.T) { + validJSON := `{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}` + claude := fakeClaudePath(t, validJSON, 0) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: claude, + SystemPrompt: "you are a supervisor", + Timeout: 5 * time.Second, + }) + + result, err := ex.Run(context.Background(), iexec.Request{ + SkillPrompt: "tdd rules", + TaskPrompt: "run red phase", + }) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + assert.True(t, result.Verified) +} + +func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) { + claude := fakeClaudePath(t, "", 1) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: claude, + SystemPrompt: "you are a supervisor", + Timeout: 5 * time.Second, + }) + + _, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"}) + assert.Error(t, err) +} + +func TestExecutorTimesOut(t *testing.T) { + dir := t.TempDir() + script := filepath.Join(dir, "claude") + require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755)) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: script, + SystemPrompt: "you are a supervisor", + Timeout: 100 * time.Millisecond, + }) + + _, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"}) + assert.ErrorContains(t, err, "timeout") +} +``` + +- [ ] **Step 2: Verify the test file compiles** + +```bash +go vet ./internal/exec/... +``` +Expected: no errors + +- [ ] **Step 3: Run — expect FAIL** + +```bash +go test ./internal/exec/... +``` +Expected: FAIL — Executor type undefined + +- [ ] **Step 4: Create `internal/exec/executor.go`** + +```go +package exec + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" +) + +// Config holds executor configuration. +type Config struct { + ClaudeBinary string // path to claude binary, defaults to "claude" + SystemPrompt string // contents of supervisor CLAUDE.md + Timeout time.Duration // per-invocation timeout, default 120s + LiteLLMBaseURL string // passed to Claude so it can delegate to Ollama + LiteLLMAPIKey string // passed to Claude for LiteLLM auth +} + +// Request is the input to a single supervisor invocation. +type Request struct { + SkillPrompt string // skill-specific discipline (e.g. tdd.md contents) + TaskPrompt string // the specific task (phase, project_root, spec, model) + Model string // resolved model name, passed in task prompt + Tools string // comma-separated allowed tools, default "Bash,Read,Write" +} + +// Executor spawns a claude instance and captures its structured JSON output. +type Executor struct { + cfg Config +} + +func New(cfg Config) *Executor { + if cfg.ClaudeBinary == "" { + cfg.ClaudeBinary = "claude" + } + if cfg.Timeout == 0 { + cfg.Timeout = 120 * time.Second + } + return &Executor{cfg: cfg} +} + +func (e *Executor) Run(ctx context.Context, req Request) (Result, error) { + ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout) + defer cancel() + + tools := req.Tools + if tools == "" { + tools = "Bash,Read,Write" + } + + // Build the full prompt: system rules + skill rules + task + infra context + litellmCtx := fmt.Sprintf( + "LITELLM_BASE_URL: %s\nLITELLM_API_KEY: %s", + e.cfg.LiteLLMBaseURL, e.cfg.LiteLLMAPIKey, + ) + prompt := strings.Join([]string{ + e.cfg.SystemPrompt, + "---", + req.SkillPrompt, + "---", + litellmCtx, + "---", + req.TaskPrompt, + }, "\n\n") + + args := []string{ + "--print", + "--bare", + "--permission-mode", "bypassPermissions", + "--tools", tools, + "--json-schema", Schema, + "--output-format", "text", + prompt, + } + + cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + if ctx.Err() != nil { + return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout) + } + return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String()) + } + + var r Result + if err := json.Unmarshal(stdout.Bytes(), &r); err != nil { + return Result{}, fmt.Errorf("parse result JSON: %w — raw output: %s", err, stdout.String()) + } + if err := r.Validate(); err != nil { + return Result{}, fmt.Errorf("invalid result: %w", err) + } + return r, nil +} +``` + +- [ ] **Step 5: Run — expect PASS** + +```bash +go test ./internal/exec/... +``` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add internal/exec/executor.go internal/exec/executor_test.go +git commit -m "feat: add executor that spawns claude and parses JSON result" +``` + +--- + +## Task 6: Registry + Skill interface + +**Files:** +- Create: `internal/registry/registry.go` +- Create: `internal/registry/registry_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/registry/registry_test.go`: +```go +package registry_test + +import ( + "context" + "encoding/json" + "testing" + + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type stubSkill struct { + name string +} + +func (s stubSkill) Name() string { return s.name } +func (s stubSkill) Tools() []registry.ToolDef { + return []registry.ToolDef{{ + Name: s.name + "_tool", + Description: "stub tool", + InputSchema: json.RawMessage(`{"type":"object","properties":{}}`), + }} +} +func (s stubSkill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + return json.RawMessage(`{"ok":true}`), nil +} + +func TestRegistryRoutes(t *testing.T) { + r := registry.New() + r.Register(stubSkill{name: "tdd"}) + + result, err := r.Dispatch(context.Background(), "tdd_tool", json.RawMessage(`{}`)) + require.NoError(t, err) + assert.JSONEq(t, `{"ok":true}`, string(result)) +} + +func TestRegistryUnknownTool(t *testing.T) { + r := registry.New() + _, err := r.Dispatch(context.Background(), "unknown_tool", json.RawMessage(`{}`)) + assert.ErrorContains(t, err, "unknown tool") +} + +func TestRegistryListsTools(t *testing.T) { + r := registry.New() + r.Register(stubSkill{name: "tdd"}) + + tools := r.Tools() + require.Len(t, tools, 1) + assert.Equal(t, "tdd_tool", tools[0].Name) +} +``` + +- [ ] **Step 2: Run — expect FAIL** + +```bash +go test ./internal/registry/... +``` +Expected: FAIL — registry package not found + +- [ ] **Step 3: Create `internal/registry/registry.go`** + +```go +package registry + +import ( + "context" + "encoding/json" + "fmt" +) + +// ToolDef describes a single MCP tool exposed by a skill. +type ToolDef struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema json.RawMessage `json:"inputSchema"` +} + +// Skill is implemented by each skill package (tdd, review, etc.). +type Skill interface { + Name() string + Tools() []ToolDef + Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) +} + +// Registry routes MCP tool calls to the correct skill handler. +type Registry struct { + skills map[string]Skill // tool name → skill + tools []ToolDef +} + +func New() *Registry { + return &Registry{skills: make(map[string]Skill)} +} + +func (r *Registry) Register(s Skill) { + for _, t := range s.Tools() { + r.skills[t.Name] = s + r.tools = append(r.tools, t) + } +} + +func (r *Registry) Tools() []ToolDef { + return r.tools +} + +func (r *Registry) Dispatch(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + s, ok := r.skills[tool] + if !ok { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + return s.Handle(ctx, tool, args) +} +``` + +- [ ] **Step 4: Run — expect PASS** + +```bash +go test ./internal/registry/... +``` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add internal/registry/ +git commit -m "feat: add skill registry with tool routing" +``` + +--- + +## Task 7: MCP server + +**Files:** +- Create: `internal/mcp/server.go` +- Create: `internal/mcp/server_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/mcp/server_test.go`: +```go +package mcp_test + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/mathiasbq/supervisor/internal/mcp" + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func jsonBody(t *testing.T, v any) *bytes.Buffer { + t.Helper() + b, err := json.Marshal(v) + require.NoError(t, err) + return bytes.NewBuffer(b) +} + +func TestMCPInitialize(t *testing.T) { + reg := registry.New() + srv := mcp.NewServer(reg) + + req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": map[string]any{}, + })) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + + srv.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + var resp map[string]any + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) + result := resp["result"].(map[string]any) + assert.Equal(t, "2024-11-05", result["protocolVersion"]) +} + +func TestMCPToolsList(t *testing.T) { + reg := registry.New() + srv := mcp.NewServer(reg) + + req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{ + "jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": map[string]any{}, + })) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + srv.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + var resp map[string]any + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) + result := resp["result"].(map[string]any) + assert.NotNil(t, result["tools"]) +} + +func TestMCPUnknownMethod(t *testing.T) { + reg := registry.New() + srv := mcp.NewServer(reg) + + req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{ + "jsonrpc": "2.0", "id": 3, "method": "unknown/method", "params": map[string]any{}, + })) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + srv.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + var resp map[string]any + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) + assert.NotNil(t, resp["error"]) +} +``` + +- [ ] **Step 2: Run — expect FAIL** + +```bash +go test ./internal/mcp/... +``` +Expected: FAIL — mcp package not found + +- [ ] **Step 3: Create `internal/mcp/server.go`** + +```go +package mcp + +import ( + "context" + "encoding/json" + "net/http" + + "github.com/mathiasbq/supervisor/internal/registry" +) + +type request struct { + JSONRPC string `json:"jsonrpc"` + ID any `json:"id"` + Method string `json:"method"` + Params json.RawMessage `json:"params"` +} + +type response struct { + JSONRPC string `json:"jsonrpc"` + ID any `json:"id,omitempty"` + Result any `json:"result,omitempty"` + Error *rpcError `json:"error,omitempty"` +} + +type rpcError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// Server is an HTTP handler implementing the MCP JSON-RPC protocol. +type Server struct { + reg *registry.Registry +} + +func NewServer(reg *registry.Registry) *Server { + return &Server{reg: reg} +} + +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + var req request + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, nil, -32700, "parse error") + return + } + + var result any + var rpcErr *rpcError + + switch req.Method { + case "initialize": + result = map[string]any{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]any{"tools": map[string]any{}}, + "serverInfo": map[string]any{"name": "supervisor", "version": "0.1.0"}, + } + + case "tools/list": + result = map[string]any{"tools": s.reg.Tools()} + + case "tools/call": + var p struct { + Name string `json:"name"` + Arguments json.RawMessage `json:"arguments"` + } + if err := json.Unmarshal(req.Params, &p); err != nil { + rpcErr = &rpcError{Code: -32602, Message: "invalid params"} + break + } + out, err := s.reg.Dispatch(context.Background(), p.Name, p.Arguments) + if err != nil { + rpcErr = &rpcError{Code: -32000, Message: err.Error()} + break + } + result = map[string]any{ + "content": []map[string]any{{"type": "text", "text": string(out)}}, + } + + default: + rpcErr = &rpcError{Code: -32601, Message: "method not found: " + req.Method} + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(response{ + JSONRPC: "2.0", + ID: req.ID, + Result: result, + Error: rpcErr, + }) +} + +func writeError(w http.ResponseWriter, id any, code int, msg string) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(response{ + JSONRPC: "2.0", + ID: id, + Error: &rpcError{Code: code, Message: msg}, + }) +} +``` + +- [ ] **Step 4: Run — expect PASS** + +```bash +go test ./internal/mcp/... +``` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add internal/mcp/ +git commit -m "feat: add MCP HTTP server with JSON-RPC 2.0 transport" +``` + +--- + +## Task 8: TDD skill + +**Files:** +- Create: `internal/skills/tdd/skill.go` +- Create: `internal/skills/tdd/handlers.go` +- Create: `internal/skills/tdd/handlers_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/skills/tdd/handlers_test.go`: +```go +package tdd_test + +import ( + "context" + "encoding/json" + "testing" + + "github.com/mathiasbq/supervisor/internal/skills/tdd" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTDDSkillTools(t *testing.T) { + skill := tdd.New(tdd.Config{ + SystemPrompt: "supervisor rules", + SkillPrompt: "tdd rules", + ExecutorFn: nil, // will be set in TestHandle + }) + tools := skill.Tools() + names := make([]string, len(tools)) + for i, t := range tools { + names[i] = t.Name + } + assert.ElementsMatch(t, []string{"tdd_red", "tdd_green", "tdd_refactor"}, names) +} + +func TestTDDSkillHandleUnknown(t *testing.T) { + skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"}) + _, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`)) + assert.ErrorContains(t, err, "unknown tool") +} + +func TestTDDRedRequiresProjectRoot(t *testing.T) { + skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"}) + _, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestTDDRedRequiresSpec(t *testing.T) { + skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"}) + _, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`)) + assert.ErrorContains(t, err, "spec") +} +``` + +- [ ] **Step 2: Run — expect FAIL** + +```bash +go test ./internal/skills/tdd/... +``` +Expected: FAIL — tdd package not found + +- [ ] **Step 3: Create `internal/skills/tdd/skill.go`** + +```go +package tdd + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn allows injecting a test double for the executor. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +type Config struct { + SystemPrompt string + SkillPrompt string + ExecutorFn ExecutorFn // nil = use real executor + DefaultModel string +} + +type Skill struct { + cfg Config +} + +func New(cfg Config) *Skill { + return &Skill{cfg: cfg} +} + +func (s *Skill) Name() string { return "tdd" } + +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{ + "type": "object", + "required": required, + "properties": props, + }) + return b + } + strProp := map[string]any{"type": "string"} + + return []registry.ToolDef{ + { + Name: "tdd_red", + Description: "Write a failing test for the described behavior. Verifies the test fails before returning.", + InputSchema: schema( + []string{"project_root", "spec"}, + map[string]any{ + "project_root": strProp, + "spec": strProp, + "model": strProp, + "test_cmd": strProp, + }, + ), + }, + { + Name: "tdd_green", + Description: "Write minimal implementation to make the test at test_path pass.", + InputSchema: schema( + []string{"project_root", "test_path"}, + map[string]any{ + "project_root": strProp, + "test_path": strProp, + "model": strProp, + "test_cmd": strProp, + }, + ), + }, + { + Name: "tdd_refactor", + Description: "Refactor the implementation at impl_path while keeping tests green.", + InputSchema: schema( + []string{"project_root", "test_path", "impl_path"}, + map[string]any{ + "project_root": strProp, + "test_path": strProp, + "impl_path": strProp, + "model": strProp, + "test_cmd": strProp, + }, + ), + }, + } +} +``` + +- [ ] **Step 4: Create `internal/skills/tdd/handlers.go`** + +```go +package tdd + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" +) + +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + switch tool { + case "tdd_red": + return s.handleRed(ctx, args) + case "tdd_green": + return s.handleGreen(ctx, args) + case "tdd_refactor": + return s.handleRefactor(ctx, args) + default: + return nil, fmt.Errorf("unknown tool: %s", tool) + } +} + +type redArgs struct { + ProjectRoot string `json:"project_root"` + Spec string `json:"spec"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` +} + +func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args redArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.Spec == "" { + return nil, fmt.Errorf("spec is required") + } + + task := fmt.Sprintf( + "phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd, + ) + return s.execute(ctx, task) +} + +type greenArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` +} + +func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args greenArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + + task := fmt.Sprintf( + "phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd, + ) + return s.execute(ctx, task) +} + +type refactorArgs struct { + ProjectRoot string `json:"project_root"` + TestPath string `json:"test_path"` + ImplPath string `json:"impl_path"` + Model string `json:"model"` + TestCmd string `json:"test_cmd"` +} + +func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args refactorArgs + if err := json.Unmarshal(raw, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if args.TestPath == "" { + return nil, fmt.Errorf("test_path is required") + } + if args.ImplPath == "" { + return nil, fmt.Errorf("impl_path is required") + } + + task := fmt.Sprintf( + "phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s", + args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd, + ) + return s.execute(ctx, task) +} + +func (s *Skill) resolveModel(override string) string { + if override != "" { + return override + } + return s.cfg.DefaultModel +} + +func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) { + req := iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + } + + var result iexec.Result + var err error + + if s.cfg.ExecutorFn != nil { + result, err = s.cfg.ExecutorFn(ctx, req) + } else { + return nil, fmt.Errorf("no executor configured") + } + + if err != nil { + return nil, err + } + + return json.Marshal(result) +} +``` + +- [ ] **Step 5: Run — expect PASS** + +```bash +go test ./internal/skills/tdd/... +``` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add internal/skills/tdd/ +git commit -m "feat: add TDD skill with red/green/refactor handlers" +``` + +--- + +## Task 9: Wire main.go + +**Files:** +- Modify: `cmd/supervisor/main.go` + +- [ ] **Step 1: Write failing integration test** + +Create `cmd/supervisor/integration_test.go`: +```go +//go:build integration + +package main_test + +import ( + "bytes" + "encoding/json" + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestServerStartsAndResponds(t *testing.T) { + // Start the server in a goroutine (assumes it's running on :3200) + // This test is run manually after `task supervisor:dev` + time.Sleep(500 * time.Millisecond) + + body, _ := json.Marshal(map[string]any{ + "jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": map[string]any{}, + }) + resp, err := http.Post("http://localhost:3200/mcp", "application/json", bytes.NewBuffer(body)) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) +} +``` + +- [ ] **Step 2: Run unit tests — all still pass** + +```bash +go test ./... +``` +Expected: PASS (integration test skipped without build tag) + +- [ ] **Step 3: Rewrite `cmd/supervisor/main.go`** + +```go +package main + +import ( + "log/slog" + "net/http" + "os" + + "github.com/mathiasbq/supervisor/internal/config" + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/mcp" + "github.com/mathiasbq/supervisor/internal/registry" + "github.com/mathiasbq/supervisor/internal/skills/tdd" +) + +func main() { + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) + + cfg, err := config.Load() + if err != nil { + logger.Error("load config", "err", err) + os.Exit(1) + } + + models, err := config.LoadModels(cfg.ModelsFile) + if err != nil { + logger.Error("load models", "err", err) + os.Exit(1) + } + + systemPrompt, err := os.ReadFile(cfg.ConfigDir + "/CLAUDE.md") + if err != nil { + logger.Error("read supervisor CLAUDE.md", "err", err) + os.Exit(1) + } + + tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md") + if err != nil { + logger.Error("read tdd.md", "err", err) + os.Exit(1) + } + + executor := iexec.New(iexec.Config{ + SystemPrompt: string(systemPrompt), + LiteLLMBaseURL: cfg.LiteLLMBaseURL, + LiteLLMAPIKey: cfg.LiteLLMAPIKey, + }) + + reg := registry.New() + reg.Register(tdd.New(tdd.Config{ + SystemPrompt: string(systemPrompt), + SkillPrompt: string(tddPrompt), + DefaultModel: models.Resolve("tdd", ""), + ExecutorFn: executor.Run, + })) + + srv := mcp.NewServer(reg) + mux := http.NewServeMux() + mux.Handle("/mcp", srv) + + addr := ":" + cfg.Port + logger.Info("supervisor starting", "addr", addr) + if err := http.ListenAndServe(addr, mux); err != nil { + logger.Error("server error", "err", err) + os.Exit(1) + } +} +``` + +- [ ] **Step 4: Run all tests — expect PASS** + +```bash +go test ./... +``` +Expected: PASS + +- [ ] **Step 5: Verify it builds** + +```bash +go build ./cmd/supervisor/ +``` +Expected: binary produced, no errors + +- [ ] **Step 6: Commit** + +```bash +git add cmd/supervisor/main.go cmd/supervisor/integration_test.go +git commit -m "feat: wire all components in main.go" +``` + +--- + +## Task 10: Config files + +**Files:** +- Create: `config/supervisor/CLAUDE.md` +- Create: `config/supervisor/tdd.md` +- Create: `.env.example` + +- [ ] **Step 1: Create `config/supervisor/CLAUDE.md`** + +```markdown +# Supervisor Agent + +You are a supervisor agent. You orchestrate skill workers. +You do not converse. You do not explain. You execute and report. + +## Output contract — NON-NEGOTIABLE + +Every response must be raw JSON conforming to the provided schema. +No preamble, no markdown, no prose. Raw JSON only. + +If you cannot produce valid JSON for any reason, output: +{"status":"error","phase":"","skill":"","file_path":"","runner_output":"","verified":false,"model_used":"self","message":""} + +## Verification — IRON LAW + +Never trust your own output. Always run the test suite. +Use the subprocess exit code as the only source of truth. +`verified: true` only when exit code matches the expected outcome for the phase. +`verified: false` even if you believe the code is correct. + +## Loop prevention + +- Maximum 3 attempts per phase. +- If output is identical across two consecutive attempts, stop immediately. +- After 3 failures set status "error" and explain the blocker in message. +- Never retry indefinitely. + +## Test runner detection + +Inspect project_root for these signals in order: + +| Signal | Command | +|---------------------------------|----------------------| +| go.mod | go test ./... | +| package.json | npm test | +| pyproject.toml / pytest.ini | pytest | +| Cargo.toml | cargo test | +| Gemfile | bundle exec rspec | +| mix.exs | mix test | + +If test_cmd is provided in the task, use it directly — skip detection. +If runner cannot be determined, return status "error". + +## Model delegation + +When a model is specified in the task, you may use it for code generation +by calling LiteLLM at the LITELLM_BASE_URL with the specified model name. +Always retain verification yourself — never delegate test execution. +Tag model_used in your response with the model that generated the code, +or "self" if you generated it directly. +``` + +- [ ] **Step 2: Create `config/supervisor/tdd.md`** + +```markdown +# TDD Skill + +## Iron Law + +NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST. + +## Red phase + +- Write exactly one test. One behavior. Name must describe the behavior clearly. +- Run the test suite. Confirm the test FAILS. +- If the test passes immediately: it tests existing behavior or is vacuous. + Return status "fail" with message explaining why the test is wrong. +- Do not write any implementation code in this phase. + +## Green phase + +- Write the minimal code to make the failing test pass. Nothing more. +- YAGNI: no extra parameters, no future-proofing, no clever abstractions. +- Run the test suite. Confirm it PASSES. +- If tests fail: fix the implementation, not the test. Max 3 attempts. + +## Refactor phase + +- Improve structure, naming, or clarity only. No new behavior. +- Tests must remain green after every change. +- If tests break during refactor: revert that change, return status "fail". +``` + +- [ ] **Step 3: Create `.env.example`** + +```bash +# Supervisor MCP server +SUPERVISOR_PORT=3200 +SUPERVISOR_CONFIG_DIR=./config/supervisor +SUPERVISOR_MODELS_FILE=./config/models.yaml + +# LiteLLM gateway (iguana) +LITELLM_BASE_URL=http://iguana:4000 +LITELLM_API_KEY=your-litellm-master-key +``` + +- [ ] **Step 4: Run all tests — still pass** + +```bash +task check +``` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add config/ .env.example +git commit -m "feat: add supervisor CLAUDE.md, tdd skill prompt, and env example" +``` + +--- + +## Task 11: Taskfile + MCP registration + +**Files:** +- Modify: `Taskfile.yml` +- Modify: `.context/mcp.json` + +- [ ] **Step 1: Add tasks to `Taskfile.yml`** + +Add these tasks to the existing `Taskfile.yml`: +```yaml + supervisor:dev: + desc: Run supervisor MCP server (development) + cmds: + - go run ./cmd/supervisor + + supervisor:build: + desc: Build supervisor binary + cmds: + - go build -o bin/supervisor ./cmd/supervisor + + supervisor:test:smoke: + desc: Smoke test supervisor via MCP (requires supervisor:dev running) + cmds: + - | + curl -s -X POST http://localhost:${SUPERVISOR_PORT:-3200}/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' | jq . +``` + +- [ ] **Step 2: Register supervisor in `.context/mcp.json`** + +Replace contents of `.context/mcp.json`: +```json +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp", + "description": "Project knowledge base — vector + graph retrieval" + }, + "supervisor": { + "url": "http://localhost:3200/mcp", + "description": "Skill workers — TDD (red/green/refactor), more coming" + } + } +} +``` + +- [ ] **Step 3: Sync context files** + +```bash +task context:sync +``` +Expected: CLAUDE.md, AGENTS.md updated with new MCP server entry + +- [ ] **Step 4: Commit** + +```bash +git add Taskfile.yml .context/mcp.json CLAUDE.md AGENTS.md +git commit -m "feat: register supervisor MCP server and add task targets" +``` + +--- + +## Task 12: End-to-end smoke test + +Manual verification that all layers connect. + +- [ ] **Step 1: Start the supervisor** + +In a terminal: +```bash +cp .env.example .env # fill in LITELLM_API_KEY if needed +task supervisor:dev +``` +Expected: `{"time":"...","level":"INFO","msg":"supervisor starting","addr":":3200"}` + +- [ ] **Step 2: Verify tools/list** + +```bash +task supervisor:test:smoke +``` +Expected: JSON response with `tdd_red`, `tdd_green`, `tdd_refactor` in `result.tools` + +- [ ] **Step 3: Verify tdd_red responds** + +```bash +curl -s -X POST http://localhost:3200/mcp \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc":"2.0","id":1,"method":"tools/call", + "params":{ + "name":"tdd_red", + "arguments":{ + "project_root":"'"$PWD"'", + "spec":"a function that returns the sum of two integers" + } + } + }' | jq . +``` +Expected: JSON-RPC response with `result.content[0].text` containing a valid supervisor Result JSON + +- [ ] **Step 4: Reload Claude Code MCP config** + +In Claude Code, run `/mcp` to verify the supervisor server appears and its tools are listed. + +- [ ] **Step 5: Final commit** + +```bash +git add -p # stage any notes or fixes from smoke test +git commit -m "chore: smoke test complete, supervisor MCP server operational" +``` + +--- + +## go.sum + +After all `go get` commands in the tasks above, run: +```bash +go mod tidy +git add go.sum +git commit -m "chore: tidy go.sum" +```