refactor: replace orchestrator/verifier chain with direct LiteLLM calls

Drop the three-layer Claude subprocess orchestration (local model → Claude verifier → cloud escalation). Skills now call LiteLLM directly and return plain text to Claude Code, which decides what to do with it. - Delete executor, orchestrator, verifier, result, attempts packages - Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error) - Replace ExecutorFn with CompleteFunc in all 6 skill configs - Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"} - Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor - Bump version to v0.5.0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 16:19:09 +02:00
parent 823de23213
commit ce45592730
34 changed files with 266 additions and 1432 deletions
--- a/internal/exec/executor.go
+++ b/internal/exec/executor.go
@@ -1,111 +0,0 @@
-package exec
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-	"time"
-)
-
-// Config holds executor configuration.
-type Config struct {
-	ClaudeBinary   string        // path to claude binary, defaults to "claude"
-	SystemPrompt   string        // contents of supervisor CLAUDE.md
-	Timeout        time.Duration // per-invocation timeout, default 120s
-	LiteLLMBaseURL string        // passed to Claude so it can delegate to Ollama
-	LiteLLMAPIKey  string        // passed to Claude for LiteLLM auth
-}
-
-// Request is the input to a single supervisor invocation.
-type Request struct {
-	SkillPrompt string // skill-specific discipline (e.g. tdd.md contents)
-	TaskPrompt  string // the specific task (phase, project_root, spec, model)
-	Model       string // resolved model name, passed in task prompt
-	Tools       string // comma-separated allowed tools, default "Bash,Read,Write"
-}
-
-// Executor spawns a claude instance and captures its structured JSON output.
-type Executor struct {
-	cfg Config
-}
-
-func New(cfg Config) *Executor {
-	if cfg.ClaudeBinary == "" {
-		cfg.ClaudeBinary = "claude"
-	}
-	if cfg.Timeout == 0 {
-		cfg.Timeout = 120 * time.Second
-	}
-	return &Executor{cfg: cfg}
-}
-
-func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
-	ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout)
-	defer cancel()
-
-	tools := req.Tools
-	if tools == "" {
-		tools = "Bash,Read,Write"
-	}
-
-	// Build the full prompt: system rules + skill rules + infra context + task.
-	// LITELLM_API_KEY is injected as a subprocess env var, not in the prompt,
-	// to prevent it appearing in error log output.
-	litellmCtx := fmt.Sprintf("LITELLM_BASE_URL: %s", e.cfg.LiteLLMBaseURL)
-	prompt := strings.Join([]string{
-		e.cfg.SystemPrompt,
-		"---",
-		req.SkillPrompt,
-		"---",
-		litellmCtx,
-		"---",
-		req.TaskPrompt,
-	}, "\n\n")
-
-	args := []string{
-		"--print",
-		"--permission-mode", "bypassPermissions",
-		"--tools", tools,
-		"--json-schema", Schema,
-		"--output-format", "json",
-	}
-	if strings.HasPrefix(req.Model, "claude-") {
-		args = append(args, "--model", req.Model)
-	}
-	args = append(args, prompt)
-
-	cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
-	cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
-
-	if err := cmd.Run(); err != nil {
-		if ctx.Err() != nil {
-			return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout)
-		}
-		return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
-	}
-
-	// --output-format json wraps the response in an envelope; structured output
-	// from --json-schema is in the "structured_output" field.
-	var envelope struct {
-		StructuredOutput *Result `json:"structured_output"`
-		IsError          bool    `json:"is_error"`
-		Result           string  `json:"result"` // fallback text result for error messages
-	}
-	if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
-		return Result{}, fmt.Errorf("parse envelope JSON: %w — raw: %s — stderr: %s", err, stdout.String(), stderr.String())
-	}
-	if envelope.StructuredOutput == nil {
-		return Result{}, fmt.Errorf("no structured_output in response — result: %s — stderr: %s", envelope.Result, stderr.String())
-	}
-	if err := envelope.StructuredOutput.Validate(); err != nil {
-		return Result{}, fmt.Errorf("invalid result: %w", err)
-	}
-	return *envelope.StructuredOutput, nil
-}
--- a/internal/exec/executor_test.go
+++ b/internal/exec/executor_test.go
@@ -1,132 +0,0 @@
-package exec_test
-
-import (
-	"context"
-	"os"
-	"path/filepath"
-	"testing"
-	"time"
-
-	iexec "github.com/mathiasbq/supervisor/internal/exec"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-// fakeClaudePath writes a shell script that prints fixed output and returns its path.
-func fakeClaudePath(t *testing.T, output string, exitCode int) string {
-	t.Helper()
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	var content string
-	if exitCode != 0 {
-		content = "#!/bin/sh\necho 'error' >&2\nexit 1\n"
-	} else {
-		content = "#!/bin/sh\necho '" + output + "'\n"
-	}
-	require.NoError(t, os.WriteFile(script, []byte(content), 0755))
-	return script
-}
-
-func TestExecutorParsesValidResult(t *testing.T) {
-	// Fake claude emits the --output-format json envelope that the real CLI produces.
-	// The executor extracts the result from the "structured_output" field.
-	envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}}`
-	claude := fakeClaudePath(t, envelope, 0)
-
-	ex := iexec.New(iexec.Config{
-		ClaudeBinary: claude,
-		SystemPrompt: "you are a supervisor",
-		Timeout:      5 * time.Second,
-	})
-
-	result, err := ex.Run(context.Background(), iexec.Request{
-		SkillPrompt: "tdd rules",
-		TaskPrompt:  "run red phase",
-	})
-	require.NoError(t, err)
-	assert.Equal(t, "pass", result.Status)
-	assert.True(t, result.Verified)
-}
-
-func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) {
-	claude := fakeClaudePath(t, "", 1)
-
-	ex := iexec.New(iexec.Config{
-		ClaudeBinary: claude,
-		SystemPrompt: "you are a supervisor",
-		Timeout:      5 * time.Second,
-	})
-
-	_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"})
-	assert.Error(t, err)
-}
-
-func TestExecutorTimesOut(t *testing.T) {
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755))
-
-	ex := iexec.New(iexec.Config{
-		ClaudeBinary: script,
-		SystemPrompt: "you are a supervisor",
-		Timeout:      100 * time.Millisecond,
-	})
-
-	_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
-	assert.ErrorContains(t, err, "timeout")
-}
-
-func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
-	// The script captures its args to a temp file so we can assert --model was passed.
-	argsFile := filepath.Join(t.TempDir(), "args.txt")
-	envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
-
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
-	require.NoError(t, os.WriteFile(script, []byte(content), 0755))
-
-	ex := iexec.New(iexec.Config{
-		ClaudeBinary: script,
-		SystemPrompt: "sys",
-		Timeout:      5 * time.Second,
-	})
-
-	_, err := ex.Run(context.Background(), iexec.Request{
-		SkillPrompt: "review rules",
-		TaskPrompt:  "do review",
-		Model:       "claude-sonnet-4-6",
-	})
-	require.NoError(t, err)
-
-	argsData, err := os.ReadFile(argsFile)
-	require.NoError(t, err)
-	assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
-}
-
-func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
-	argsFile := filepath.Join(t.TempDir(), "args.txt")
-	envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
-
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
-	require.NoError(t, os.WriteFile(script, []byte(content), 0755))
-
-	ex := iexec.New(iexec.Config{
-		ClaudeBinary: script,
-		SystemPrompt: "sys",
-		Timeout:      5 * time.Second,
-	})
-
-	_, err := ex.Run(context.Background(), iexec.Request{
-		SkillPrompt: "review rules",
-		TaskPrompt:  "do review",
-		Model:       "ollama/devstral",
-	})
-	require.NoError(t, err)
-
-	argsData, err := os.ReadFile(argsFile)
-	require.NoError(t, err)
-	assert.NotContains(t, string(argsData), "--model")
-}
--- a/internal/exec/litellm.go
+++ b/internal/exec/litellm.go
@@ -9,9 +9,8 @@ import (
 	"time"
 )

-// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
-// Local models are expected to return a JSON object matching the Result schema
-// as their response content — no envelope.
+// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
+// and returns the raw assistant message text.
 type LiteLLMExecutor struct {
 	baseURL    string
 	apiKey     string
@@ -21,9 +20,12 @@ type LiteLLMExecutor struct {
 // NewLiteLLM creates a LiteLLMExecutor.
 // timeout applies to the full HTTP round-trip per call.
 func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
+	if timeout == 0 {
+		timeout = 120 * time.Second
+	}
 	return &LiteLLMExecutor{
-		baseURL: baseURL,
-		apiKey:  apiKey,
+		baseURL:    baseURL,
+		apiKey:     apiKey,
 		httpClient: &http.Client{Timeout: timeout},
 	}
 }
@@ -46,58 +48,50 @@ type litellmResponse struct {
 	Choices []litellmChoice `json:"choices"`
 }

-// Run dispatches req to the LiteLLM server and parses the Result from the
-// assistant message content. Returns an error on network failure, non-200
-// status, or unparseable/invalid JSON — all of which the Orchestrator treats
-// as automatic escalation triggers.
-func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
+// Complete sends system+user messages to the given model and returns the raw
+// assistant text along with the round-trip duration in milliseconds.
+func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
 	body := litellmRequest{
-		Model: req.Model,
+		Model: model,
 		Messages: []litellmMessage{
-			{Role: "system", Content: req.SkillPrompt},
-			{Role: "user", Content: req.TaskPrompt},
+			{Role: "system", Content: system},
+			{Role: "user", Content: user},
 		},
 	}

 	bodyBytes, err := json.Marshal(body)
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
+		return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
 	}

 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: create request: %w", err)
+		return "", 0, fmt.Errorf("litellm: create request: %w", err)
 	}
 	httpReq.Header.Set("Content-Type", "application/json")
 	if e.apiKey != "" {
 		httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
 	}

+	t0 := time.Now()
 	resp, err := e.httpClient.Do(httpReq)
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: request failed: %w", err)
+		return "", 0, fmt.Errorf("litellm: request failed: %w", err)
 	}
 	defer resp.Body.Close() //nolint:errcheck
+	durationMs := time.Since(t0).Milliseconds()

 	if resp.StatusCode != http.StatusOK {
-		return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
+		return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
 	}

 	var chatResp litellmResponse
 	if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
-		return Result{}, fmt.Errorf("litellm: decode response: %w", err)
+		return "", 0, fmt.Errorf("litellm: decode response: %w", err)
 	}
 	if len(chatResp.Choices) == 0 {
-		return Result{}, fmt.Errorf("litellm: no choices in response")
+		return "", 0, fmt.Errorf("litellm: no choices in response")
 	}

-	content := chatResp.Choices[0].Message.Content
-	var result Result
-	if err := json.Unmarshal([]byte(content), &result); err != nil {
-		return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
-	}
-	if err := result.Validate(); err != nil {
-		return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
-	}
-	return result, nil
+	return chatResp.Choices[0].Message.Content, durationMs, nil
 }
--- a/internal/exec/litellm_test.go
+++ b/internal/exec/litellm_test.go
@@ -13,23 +13,11 @@ import (
 	"github.com/stretchr/testify/require"
 )

-func validLiteLLMResult() iexec.Result {
-	return iexec.Result{
-		Status:    "pass",
-		Phase:     "review",
-		Skill:     "review",
-		ModelUsed: "ollama/devstral",
-		Message:   "looks good",
-	}
-}
-
-func chatResponseFor(t *testing.T, result iexec.Result) []byte {
+func chatResponse(t *testing.T, content string) []byte {
 	t.Helper()
-	content, err := json.Marshal(result)
-	require.NoError(t, err)
 	resp := map[string]any{
 		"choices": []map[string]any{
-			{"message": map[string]any{"role": "assistant", "content": string(content)}},
+			{"message": map[string]any{"role": "assistant", "content": content}},
 		},
 	}
 	data, err := json.Marshal(resp)
@@ -37,25 +25,21 @@ func chatResponseFor(t *testing.T, result iexec.Result) []byte {
 	return data
 }

-func TestLiteLLMParsesValidResult(t *testing.T) {
+func TestLiteLLMReturnsText(t *testing.T) {
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		assert.Equal(t, "/v1/chat/completions", r.URL.Path)
 		assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
+		_, _ = w.Write(chatResponse(t, "here is my analysis"))
 	}))
 	defer srv.Close()

 	ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
-	result, err := ex.Run(context.Background(), iexec.Request{
-		SkillPrompt: "review rules",
-		TaskPrompt:  "review the code",
-		Model:       "ollama/devstral",
-	})
+	text, dur, err := ex.Complete(context.Background(), "ollama/devstral", "system prompt", "user prompt")
 	require.NoError(t, err)
-	assert.Equal(t, "pass", result.Status)
-	assert.Equal(t, "review", result.Skill)
+	assert.Equal(t, "here is my analysis", text)
+	assert.GreaterOrEqual(t, dur, int64(0))
 }

 func TestLiteLLMSendsAuthHeader(t *testing.T) {
@@ -63,12 +47,12 @@ func TestLiteLLMSendsAuthHeader(t *testing.T) {
 		assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
+		_, _ = w.Write(chatResponse(t, "ok"))
 	}))
 	defer srv.Close()

 	ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
-	_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
+	_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
 	require.NoError(t, err)
 }

@@ -79,34 +63,28 @@ func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
 	defer srv.Close()

 	ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
-	_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
+	_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
 	assert.ErrorContains(t, err, "503")
 }

-func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
+func TestLiteLLMErrorOnEmptyChoices(t *testing.T) {
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
-		resp := map[string]any{
-			"choices": []map[string]any{
-				{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
-			},
-		}
-		data, _ := json.Marshal(resp)
-		_, _ = w.Write(data)
+		_, _ = w.Write([]byte(`{"choices":[]}`))
 	}))
 	defer srv.Close()

 	ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
-	_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
-	assert.Error(t, err)
+	_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
+	assert.ErrorContains(t, err, "no choices")
 }

 func TestLiteLLMRespectsContextCancellation(t *testing.T) {
 	ctx, cancel := context.WithCancel(context.Background())
-	cancel() // Cancel immediately
+	cancel()

 	ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
-	_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
+	_, _, err := ex.Complete(ctx, "model", "sys", "user")
 	assert.Error(t, err)
 }
--- a/internal/exec/orchestrator.go
+++ b/internal/exec/orchestrator.go
@@ -1,197 +0,0 @@
-package exec
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"net/http"
-	"strings"
-	"time"
-)
-
-// ChainEntry is one tier in an escalation chain.
-type ChainEntry struct {
-	Model   string // e.g. "ollama/phi4", "claude-sonnet-4-6"
-	Tier    string // "local" | "subagent" | "managed"
-	IsCloud bool   // true for claude-* models; skips verifier call
-}
-
-// EntryFor builds a ChainEntry from a model name string.
-func EntryFor(model string) ChainEntry {
-	cloud := strings.HasPrefix(model, "claude-")
-	tier := "local"
-	if cloud {
-		tier = "subagent"
-	}
-	return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
-}
-
-// AttemptRecord captures the outcome of one tier attempt for session logging.
-type AttemptRecord struct {
-	Model      string
-	Tier       string
-	DurationMs int64
-	WarmStart  bool
-	Verdict    string // "accept" | "escalate" | "error"
-	Feedback   string
-}
-
-// VerifierFn is the interface the orchestrator uses to verify local output.
-type VerifierFn interface {
-	Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
-}
-
-// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
-type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
-
-// Orchestrator walks an escalation chain, delegating generation and verification.
-// It implements the ExecutorFn shape expected by skill handlers.
-type Orchestrator struct {
-	chain        []ChainEntry
-	localRun     ExecutorRunFn // for local (non-cloud) tiers; may be nil
-	cloudRun     ExecutorRunFn // for cloud tiers; may be nil
-	verifier     VerifierFn
-	llamaSwapURL string
-	attempts     *[]AttemptRecord
-}
-
-// NewOrchestrator creates an Orchestrator.
-// attempts is a pointer to a slice that will be appended to on each tier attempt.
-// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
-func NewOrchestrator(
-	chain []ChainEntry,
-	localRun ExecutorRunFn,
-	cloudRun ExecutorRunFn,
-	verifier VerifierFn,
-	llamaSwapURL string,
-	attempts *[]AttemptRecord,
-) *Orchestrator {
-	return &Orchestrator{
-		chain:        chain,
-		localRun:     localRun,
-		cloudRun:     cloudRun,
-		verifier:     verifier,
-		llamaSwapURL: llamaSwapURL,
-		attempts:     attempts,
-	}
-}
-
-// Run walks the escalation chain and returns the first accepted result.
-// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
-func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
-	taskPrompt := req.TaskPrompt
-
-	for _, entry := range o.chain {
-		warm := o.probeWarm(entry.Model)
-		start := time.Now()
-
-		tierReq := req
-		tierReq.Model = entry.Model
-		tierReq.TaskPrompt = taskPrompt
-
-		if entry.IsCloud {
-			result, genErr := o.cloudRun(ctx, tierReq)
-			dur := time.Since(start).Milliseconds()
-			verdict := "accept"
-			if genErr != nil {
-				verdict = "error"
-			}
-			o.appendAttempt(AttemptRecord{
-				Model:      entry.Model,
-				Tier:       entry.Tier,
-				DurationMs: dur,
-				WarmStart:  warm,
-				Verdict:    verdict,
-			})
-			if genErr == nil {
-				return result, nil
-			}
-			continue
-		}
-
-		// Local tier.
-		result, genErr := o.localRun(ctx, tierReq)
-		dur := time.Since(start).Milliseconds()
-
-		if genErr != nil {
-			o.appendAttempt(AttemptRecord{
-				Model:      entry.Model,
-				Tier:       entry.Tier,
-				DurationMs: dur,
-				WarmStart:  warm,
-				Verdict:    "error",
-				Feedback:   genErr.Error(),
-			})
-			continue
-		}
-
-		verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
-		if verErr != nil {
-			// Treat verifier failure as escalate (safe default).
-			o.appendAttempt(AttemptRecord{
-				Model:      entry.Model,
-				Tier:       entry.Tier,
-				DurationMs: dur,
-				WarmStart:  warm,
-				Verdict:    "escalate",
-				Feedback:   "verifier error: " + verErr.Error(),
-			})
-			continue
-		}
-
-		if verdict.Accept {
-			o.appendAttempt(AttemptRecord{
-				Model:      entry.Model,
-				Tier:       entry.Tier,
-				DurationMs: dur,
-				WarmStart:  warm,
-				Verdict:    "accept",
-			})
-			return result, nil
-		}
-
-		o.appendAttempt(AttemptRecord{
-			Model:      entry.Model,
-			Tier:       entry.Tier,
-			DurationMs: dur,
-			WarmStart:  warm,
-			Verdict:    "escalate",
-			Feedback:   verdict.Feedback,
-		})
-		// Inject verifier feedback into the next tier's task prompt.
-		taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
-	}
-
-	return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
-}
-
-func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
-	if o.attempts != nil {
-		*o.attempts = append(*o.attempts, rec)
-	}
-}
-
-// probeWarm checks whether the model is currently loaded in llama-swap.
-// Returns false on any error or if llamaSwapURL is empty.
-func (o *Orchestrator) probeWarm(model string) bool {
-	if o.llamaSwapURL == "" {
-		return false
-	}
-	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
-	defer cancel()
-
-	req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
-	if err != nil {
-		return false
-	}
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		return false
-	}
-	defer resp.Body.Close() //nolint:errcheck
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return false
-	}
-	return strings.Contains(string(body), model)
-}
--- a/internal/exec/orchestrator_test.go
+++ b/internal/exec/orchestrator_test.go
@@ -1,151 +0,0 @@
-package exec_test
-
-import (
-	"context"
-	"errors"
-	"testing"
-
-	iexec "github.com/mathiasbq/supervisor/internal/exec"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-// stubRunFn returns preset results sequentially.
-type stubRunFn struct {
-	calls   []stubCall
-	callIdx int
-}
-
-type stubCall struct {
-	result iexec.Result
-	err    error
-}
-
-func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
-	if s.callIdx >= len(s.calls) {
-		return iexec.Result{}, errors.New("unexpected call")
-	}
-	c := s.calls[s.callIdx]
-	s.callIdx++
-	return c.result, c.err
-}
-
-// stubVerifier returns preset verdicts sequentially.
-type stubVerifier struct {
-	verdicts []iexec.Verdict
-	idx      int
-}
-
-func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
-	if s.idx >= len(s.verdicts) {
-		return iexec.Verdict{}, errors.New("unexpected verify call")
-	}
-	v := s.verdicts[s.idx]
-	s.idx++
-	return v, nil
-}
-
-func okResult(skill string) iexec.Result {
-	return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
-}
-
-func TestOrchestratorSingleLocalAccept(t *testing.T) {
-	local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
-	verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
-
-	var attempts []iexec.AttemptRecord
-	orch := iexec.NewOrchestrator(
-		[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
-		local.Run, nil, verifier, "", &attempts,
-	)
-
-	result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
-	require.NoError(t, err)
-	assert.Equal(t, "pass", result.Status)
-	require.Len(t, attempts, 1)
-	assert.Equal(t, "local", attempts[0].Tier)
-	assert.Equal(t, "accept", attempts[0].Verdict)
-}
-
-func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
-	local := &stubRunFn{calls: []stubCall{
-		{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
-		{result: okResult("review")},
-	}}
-	verifier := &stubVerifier{verdicts: []iexec.Verdict{
-		{Accept: false, Feedback: "missing line refs"},
-		{Accept: true},
-	}}
-
-	var attempts []iexec.AttemptRecord
-	orch := iexec.NewOrchestrator(
-		[]iexec.ChainEntry{
-			{Model: "ollama/devstral", Tier: "local", IsCloud: false},
-			{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
-		},
-		local.Run, nil, verifier, "", &attempts,
-	)
-
-	result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
-	require.NoError(t, err)
-	assert.Equal(t, "pass", result.Status)
-	require.Len(t, attempts, 2)
-	assert.Equal(t, "escalate", attempts[0].Verdict)
-	assert.Equal(t, "missing line refs", attempts[0].Feedback)
-	assert.Equal(t, "accept", attempts[1].Verdict)
-}
-
-func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
-	local := &stubRunFn{calls: []stubCall{
-		{err: errors.New("network failure")},
-		{result: okResult("review")},
-	}}
-	verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
-
-	var attempts []iexec.AttemptRecord
-	orch := iexec.NewOrchestrator(
-		[]iexec.ChainEntry{
-			{Model: "ollama/devstral", Tier: "local", IsCloud: false},
-			{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
-		},
-		local.Run, nil, verifier, "", &attempts,
-	)
-
-	_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
-	require.NoError(t, err)
-	require.Len(t, attempts, 2)
-	assert.Equal(t, "error", attempts[0].Verdict)
-	assert.Equal(t, "accept", attempts[1].Verdict)
-}
-
-func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
-	cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
-	verifier := &stubVerifier{} // no verdicts — must not be called
-
-	var attempts []iexec.AttemptRecord
-	orch := iexec.NewOrchestrator(
-		[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
-		nil, cloud.Run, verifier, "", &attempts,
-	)
-
-	result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
-	require.NoError(t, err)
-	assert.Equal(t, "pass", result.Status)
-	require.Len(t, attempts, 1)
-	assert.Equal(t, "subagent", attempts[0].Tier)
-	assert.Equal(t, "accept", attempts[0].Verdict)
-	assert.Equal(t, 0, verifier.idx) // verifier never called
-}
-
-func TestOrchestratorAllTiersExhausted(t *testing.T) {
-	local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
-
-	var attempts []iexec.AttemptRecord
-	orch := iexec.NewOrchestrator(
-		[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
-		local.Run, nil, &stubVerifier{}, "", &attempts,
-	)
-
-	_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
-	assert.ErrorContains(t, err, "all tiers exhausted")
-}
--- a/internal/exec/result.go
+++ b/internal/exec/result.go
@@ -1,66 +0,0 @@
-package exec
-
-import (
-	"errors"
-	"strings"
-)
-
-// Result is the structured JSON output from every supervisor invocation.
-// The JSON schema constant is passed to claude via --json-schema so Claude
-// validates its own output before returning.
-type Result struct {
-	Status       string `json:"status"`        // pass | fail | error
-	Phase        string `json:"phase"`         // red | green | refactor | retrospective | review | debug | spec | trainer
-	Skill        string `json:"skill"`         // tdd | review | ...
-	FilePath     string `json:"file_path"`     // absolute path to generated file
-	RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
-	Verified     bool            `json:"verified"`           // based on exit code, never self-report
-	ModelUsed    string          `json:"model_used"`         // model name or "self"
-	Message      string          `json:"message"`            // one sentence summary
-	Attempts     []AttemptRecord `json:"attempts,omitempty"` // populated by orchestrator, not Claude
-}
-
-var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true}
-var validPhases = map[string]bool{
-	"red":           true,
-	"green":         true,
-	"refactor":      true,
-	"retrospective": true,
-	"review":        true,
-	"debug":         true,
-	"spec":          true,
-	"trainer":       true,
-}
-
-func (r Result) Validate() error {
-	var errs []string
-	if !validStatuses[r.Status] {
-		errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
-	}
-	if !validPhases[r.Phase] {
-		errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase)
-	}
-	if r.Skill == "" {
-		errs = append(errs, "skill is required")
-	}
-	if len(errs) > 0 {
-		return errors.New(strings.Join(errs, "; "))
-	}
-	return nil
-}
-
-// Schema is passed to claude --json-schema to enforce structured output.
-const Schema = `{
-  "type": "object",
-  "required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
-  "properties": {
-    "status":        {"type": "string", "enum": ["pass","fail","error"]},
-    "phase":         {"type": "string"},
-    "skill":         {"type": "string"},
-    "file_path":     {"type": "string"},
-    "runner_output": {"type": "string"},
-    "verified":      {"type": "boolean"},
-    "model_used":    {"type": "string"},
-    "message":       {"type": "string"}
-  }
-}`
--- a/internal/exec/result_test.go
+++ b/internal/exec/result_test.go
@@ -1,79 +0,0 @@
-package exec_test
-
-import (
-	"encoding/json"
-	"testing"
-
-	"github.com/mathiasbq/supervisor/internal/exec"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestResultParsesValidJSON(t *testing.T) {
-	raw := `{
-		"status": "pass",
-		"phase": "red",
-		"skill": "tdd",
-		"file_path": "/tmp/foo_test.go",
-		"runner_output": "--- FAIL: TestFoo",
-		"verified": true,
-		"model_used": "self",
-		"message": "test fails as expected"
-	}`
-	var r exec.Result
-	require.NoError(t, json.Unmarshal([]byte(raw), &r))
-	assert.Equal(t, "pass", r.Status)
-	assert.Equal(t, "red", r.Phase)
-	assert.True(t, r.Verified)
-}
-
-func TestResultValidation(t *testing.T) {
-	tests := []struct {
-		name    string
-		result  exec.Result
-		wantErr bool
-	}{
-		{
-			name: "valid pass result",
-			result: exec.Result{
-				Status: "pass", Phase: "red", Skill: "tdd",
-				FilePath: "/tmp/x_test.go", RunnerOutput: "FAIL",
-				Verified: true, ModelUsed: "self", Message: "ok",
-			},
-			wantErr: false,
-		},
-		{
-			name:    "empty status",
-			result:  exec.Result{Phase: "red", Skill: "tdd"},
-			wantErr: true,
-		},
-		{
-			name:    "invalid status",
-			result:  exec.Result{Status: "unknown", Phase: "red", Skill: "tdd"},
-			wantErr: true,
-		},
-		{
-			name:    "invalid phase",
-			result:  exec.Result{Status: "pass", Phase: "bad", Skill: "tdd"},
-			wantErr: true,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			err := tt.result.Validate()
-			if tt.wantErr {
-				assert.Error(t, err)
-			} else {
-				assert.NoError(t, err)
-			}
-		})
-	}
-}
-
-func TestValidateAcceptsAllPhases(t *testing.T) {
-	phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"}
-	for _, phase := range phases {
-		r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"}
-		assert.NoError(t, r.Validate(), "phase %q should be valid", phase)
-	}
-}
--- a/internal/exec/verifier.go
+++ b/internal/exec/verifier.go
@@ -1,99 +0,0 @@
-package exec
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"time"
-)
-
-// Verdict is the output of a Claude verification call.
-type Verdict struct {
-	Accept   bool   `json:"accept"`
-	Feedback string `json:"feedback"` // empty when Accept is true
-}
-
-// Verifier runs a focused Claude call to judge local model output.
-type Verifier struct {
-	claudeBinary string
-	model        string
-	timeout      time.Duration
-}
-
-// NewVerifier creates a Verifier that calls claude with the given binary path and model.
-// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
-func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
-	if claudeBinary == "" {
-		claudeBinary = "claude"
-	}
-	if timeout == 0 {
-		timeout = 30 * time.Second
-	}
-	return &Verifier{
-		claudeBinary: claudeBinary,
-		model:        model,
-		timeout:      timeout,
-	}
-}
-
-// Verify asks Claude whether output satisfies the skill discipline's iron laws.
-// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
-// to escalate. Returns an error on subprocess failure or unparseable response.
-func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
-	ctx, cancel := context.WithTimeout(ctx, v.timeout)
-	defer cancel()
-
-	outputJSON, err := json.Marshal(output)
-	if err != nil {
-		return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
-	}
-
-	prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
-
-Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
-
-Reply with JSON only — no other text:
-{"accept": true, "feedback": ""}
-or
-{"accept": false, "feedback": "<one sentence reason>"}
-
-## Skill discipline
-%s
-
-## Original task
-%s
-
-## Generated output
-%s`, skillPrompt, taskPrompt, string(outputJSON))
-
-	args := []string{
-		"--print",
-		"--permission-mode", "bypassPermissions",
-	}
-	if v.model != "" {
-		args = append(args, "--model", v.model)
-	}
-	args = append(args, prompt)
-
-	cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
-	cmd.Env = os.Environ()
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
-
-	if err := cmd.Run(); err != nil {
-		if ctx.Err() != nil {
-			return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
-		}
-		return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
-	}
-
-	var verdict Verdict
-	if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
-		return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
-	}
-	return verdict, nil
-}
--- a/internal/exec/verifier_test.go
+++ b/internal/exec/verifier_test.go
@@ -1,74 +0,0 @@
-package exec_test
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-	"time"
-
-	iexec "github.com/mathiasbq/supervisor/internal/exec"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
-	t.Helper()
-	data, err := json.Marshal(verdict)
-	require.NoError(t, err)
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
-	require.NoError(t, os.WriteFile(script, []byte(content), 0755))
-	return script
-}
-
-func TestVerifierAccepts(t *testing.T) {
-	claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
-	v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
-
-	verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
-		Status: "pass", Phase: "review", Skill: "review", Message: "ok",
-	})
-	require.NoError(t, err)
-	assert.True(t, verdict.Accept)
-	assert.Empty(t, verdict.Feedback)
-}
-
-func TestVerifierEscalates(t *testing.T) {
-	claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
-	v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
-
-	verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
-		Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
-	})
-	require.NoError(t, err)
-	assert.False(t, verdict.Accept)
-	assert.Equal(t, "missing line references", verdict.Feedback)
-}
-
-func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
-
-	v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
-	_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
-		Status: "pass", Phase: "review", Skill: "review", Message: "ok",
-	})
-	assert.Error(t, err)
-}
-
-func TestVerifierErrorOnNonZeroExit(t *testing.T) {
-	dir := t.TempDir()
-	script := filepath.Join(dir, "claude")
-	require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
-
-	v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
-	_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
-		Status: "pass", Phase: "review", Skill: "review", Message: "ok",
-	})
-	assert.Error(t, err)
-}