feat(debug): add debug MCP skill with hypothesis generation

Implements the debug skill following the same pattern as review. The skill accepts project_root + error (+ optional context/model/session_id), prepends session history, and calls the executor to produce 3-5 ordered hypotheses — diagnosis only, no fixes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 11:29:58 +02:00
parent 8fb44affef
commit 8cff57009a
5 changed files with 240 additions and 0 deletions
--- a/cmd/supervisor/main.go
+++ b/cmd/supervisor/main.go
@@ -13,6 +13,7 @@ import (
 	"github.com/mathiasbq/supervisor/internal/skills/brain"
 	"github.com/mathiasbq/supervisor/internal/skills/org"
 	"github.com/mathiasbq/supervisor/internal/skills/retrospective"
 	skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug"
 	"github.com/mathiasbq/supervisor/internal/skills/review"
 	"github.com/mathiasbq/supervisor/internal/skills/sessionlog"
 	"github.com/mathiasbq/supervisor/internal/skills/tdd"
@@ -58,6 +59,12 @@ func main() {
 		os.Exit(1)
 	}
 	debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md")
 	if err != nil {
 		logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err)
 		os.Exit(1)
 	}
 	executor := iexec.New(iexec.Config{
 		SystemPrompt:   string(systemPrompt),
 		LiteLLMBaseURL: cfg.LiteLLMBaseURL,
@@ -97,6 +104,12 @@ func main() {
 		ExecutorFn:   executor.Run,
 		SessionsDir:  cfg.SessionsDir,
 	}))
 	reg.Register(skilldebug.New(skilldebug.Config{
 		SkillPrompt:  string(debugPrompt),
 		DefaultModel: models.Resolve("debug", ""),
 		ExecutorFn:   executor.Run,
 		SessionsDir:  cfg.SessionsDir,
 	}))
 	srv := mcp.NewServer(reg)
 	mux := http.NewServeMux()
--- a/config/supervisor/debug.md
+++ b/config/supervisor/debug.md
@@ -0,0 +1,31 @@
 # Debug Discipline
 You are a systematic debugger. Form hypotheses before suggesting fixes.
 ## Iron laws
 1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct
 2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first)
 3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses
 ## Output contract
 Return JSON result with:
 - `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse)
 - `phase`: "debug"
 - `skill`: "debug"
 - `file_path`: the most relevant file to the error (read it)
 - `runner_output`: your hypotheses, formatted as:
  ```
  HYPOTHESIS 1 (likelihood: high): <mechanism>
  VERIFY: <exact command or file to check> → expected if correct: <specific output>
  HYPOTHESIS 2 (likelihood: medium): <mechanism>
  VERIFY: <exact command or file to check> → expected if correct: <specific output>
  ```
 - `verified`: false — verification is the caller's job
 - `message`: "N hypotheses for: <one-line error summary>"
 ## Rules
 1. Read the error and any context files provided before forming hypotheses
 2. Identify the failure mode first — what actually went wrong, not just what the error says
 3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output
 4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1
--- a/internal/skills/debug/handlers.go
+++ b/internal/skills/debug/handlers.go
@@ -0,0 +1,80 @@
 // internal/skills/debug/handlers.go
 package debug
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/session"
 )
 type debugArgs struct {
 	ProjectRoot string `json:"project_root"`
 	Error       string `json:"error"`
 	Context     string `json:"context"`
 	Model       string `json:"model"`
 	SessionID   string `json:"session_id"`
 }
 // Handle dispatches the MCP tool call to the appropriate handler.
 func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
 	if tool != "debug" {
 		return nil, fmt.Errorf("unknown tool: %s", tool)
 	}
 	var a debugArgs
 	if err := json.Unmarshal(args, &a); err != nil {
 		return nil, fmt.Errorf("parse args: %w", err)
 	}
 	if a.ProjectRoot == "" {
 		return nil, fmt.Errorf("project_root is required")
 	}
 	if a.Error == "" {
 		return nil, fmt.Errorf("error is required")
 	}
 	model := a.Model
 	if model == "" {
 		model = s.cfg.DefaultModel
 	}
 	task := fmt.Sprintf(
 		"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
 		a.ProjectRoot, a.Error, a.Context, model,
 	)
 	task = s.prependHistory(a.SessionID, "debug", task)
 	if s.cfg.ExecutorFn == nil {
 		return nil, fmt.Errorf("no executor configured")
 	}
 	result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
 		SkillPrompt: s.cfg.SkillPrompt,
 		TaskPrompt:  task,
 		Model:       model,
 		Tools:       "Read,Bash",
 	})
 	if err != nil {
 		return nil, err
 	}
 	b, err := json.Marshal(result)
 	if err != nil {
 		return nil, fmt.Errorf("marshal result: %w", err)
 	}
 	return b, nil
 }
 func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
 	if sessionID == "" || s.cfg.SessionsDir == "" {
 		return task
 	}
 	entries, err := session.Read(s.cfg.SessionsDir, sessionID)
 	if err != nil || len(entries) == 0 {
 		return task
 	}
 	history := session.FormatHistory(entries, currentPhase)
 	if history == "" {
 		return task
 	}
 	return history + "\n---\n\n" + task
 }
--- a/internal/skills/debug/handlers_test.go
+++ b/internal/skills/debug/handlers_test.go
@@ -0,0 +1,61 @@
 // internal/skills/debug/handlers_test.go
 package debug_test
 import (
 	"context"
 	"encoding/json"
 	"testing"
 	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/skills/debug"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestDebugToolRegistered(t *testing.T) {
 	sk := debug.New(debug.Config{SkillPrompt: "debug rules"})
 	names := make([]string, 0)
 	for _, tool := range sk.Tools() {
 		names = append(names, tool.Name)
 	}
 	assert.Contains(t, names, "debug")
 }
 func TestDebugRequiresProjectRoot(t *testing.T) {
 	sk := debug.New(debug.Config{SkillPrompt: "d"})
 	_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`))
 	assert.ErrorContains(t, err, "project_root")
 }
 func TestDebugRequiresError(t *testing.T) {
 	sk := debug.New(debug.Config{SkillPrompt: "d"})
 	_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`))
 	assert.ErrorContains(t, err, "error")
 }
 func TestDebugCallsExecutor(t *testing.T) {
 	called := false
 	var capturedTask string
 	fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
 		called = true
 		capturedTask = req.TaskPrompt
 		return iexec.Result{
 			Status: "pass", Phase: "debug", Skill: "debug",
 			RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference",
 			Verified:     false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42",
 		}, nil
 	}
 	sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
 	out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
 		`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
 	))
 	require.NoError(t, err)
 	assert.True(t, called)
 	assert.Contains(t, capturedTask, "panic: nil pointer dereference")
 	assert.Contains(t, capturedTask, "occurs on startup")
 	var result iexec.Result
 	require.NoError(t, json.Unmarshal(out, &result))
 	assert.Equal(t, "debug", result.Phase)
 }
--- a/internal/skills/debug/skill.go
+++ b/internal/skills/debug/skill.go
@@ -0,0 +1,55 @@
 // internal/skills/debug/skill.go
 package debug
 import (
 	"context"
 	"encoding/json"
 	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/registry"
 )
 // ExecutorFn is the function signature for running a worker subprocess.
 type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
 // Config holds dependencies for the debug skill.
 type Config struct {
 	SkillPrompt  string
 	DefaultModel string
 	ExecutorFn   ExecutorFn
 	SessionsDir  string
 }
 // Skill implements the debug MCP tool.
 type Skill struct{ cfg Config }
 // New creates a new debug Skill.
 func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
 // Name returns the skill identifier.
 func (s *Skill) Name() string { return "debug" }
 // Tools returns the MCP tool definitions for this skill.
 func (s *Skill) Tools() []registry.ToolDef {
 	schema := func(required []string, props map[string]any) json.RawMessage {
 		b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
 		return b
 	}
 	str := map[string]any{"type": "string"}
 	return []registry.ToolDef{
 		{
 			Name:        "debug",
 			Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.",
 			InputSchema: schema(
 				[]string{"project_root", "error"},
 				map[string]any{
 					"project_root": str,
 					"error":        str,
 					"context":      str,
 					"model":        str,
 					"session_id":   str,
 				},
 			),
 		},
 	}
 }