feat: add executor that spawns claude and parses JSON result

2026-04-17 07:39:26 +02:00
parent 4c00b55ca5
commit 91b98aed34
2 changed files with 174 additions and 0 deletions
--- a/internal/exec/executor.go
+++ b/internal/exec/executor.go
@@ -0,0 +1,99 @@
+package exec
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// Config holds executor configuration.
+type Config struct {
+	ClaudeBinary   string        // path to claude binary, defaults to "claude"
+	SystemPrompt   string        // contents of supervisor CLAUDE.md
+	Timeout        time.Duration // per-invocation timeout, default 120s
+	LiteLLMBaseURL string        // passed to Claude so it can delegate to Ollama
+	LiteLLMAPIKey  string        // passed to Claude for LiteLLM auth
+}
+
+// Request is the input to a single supervisor invocation.
+type Request struct {
+	SkillPrompt string // skill-specific discipline (e.g. tdd.md contents)
+	TaskPrompt  string // the specific task (phase, project_root, spec, model)
+	Model       string // resolved model name, passed in task prompt
+	Tools       string // comma-separated allowed tools, default "Bash,Read,Write"
+}
+
+// Executor spawns a claude instance and captures its structured JSON output.
+type Executor struct {
+	cfg Config
+}
+
+func New(cfg Config) *Executor {
+	if cfg.ClaudeBinary == "" {
+		cfg.ClaudeBinary = "claude"
+	}
+	if cfg.Timeout == 0 {
+		cfg.Timeout = 120 * time.Second
+	}
+	return &Executor{cfg: cfg}
+}
+
+func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
+	ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout)
+	defer cancel()
+
+	tools := req.Tools
+	if tools == "" {
+		tools = "Bash,Read,Write"
+	}
+
+	// Build the full prompt: system rules + skill rules + infra context + task
+	litellmCtx := fmt.Sprintf(
+		"LITELLM_BASE_URL: %s\nLITELLM_API_KEY: %s",
+		e.cfg.LiteLLMBaseURL, e.cfg.LiteLLMAPIKey,
+	)
+	prompt := strings.Join([]string{
+		e.cfg.SystemPrompt,
+		"---",
+		req.SkillPrompt,
+		"---",
+		litellmCtx,
+		"---",
+		req.TaskPrompt,
+	}, "\n\n")
+
+	args := []string{
+		"--print",
+		"--bare",
+		"--permission-mode", "bypassPermissions",
+		"--tools", tools,
+		"--json-schema", Schema,
+		"--output-format", "text",
+		prompt,
+	}
+
+	cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		if ctx.Err() != nil {
+			return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout)
+		}
+		return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
+	}
+
+	var r Result
+	if err := json.Unmarshal(stdout.Bytes(), &r); err != nil {
+		return Result{}, fmt.Errorf("parse result JSON: %w — raw output: %s", err, stdout.String())
+	}
+	if err := r.Validate(); err != nil {
+		return Result{}, fmt.Errorf("invalid result: %w", err)
+	}
+	return r, nil
+}
--- a/internal/exec/executor_test.go
+++ b/internal/exec/executor_test.go
@@ -0,0 +1,75 @@
+package exec_test
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	iexec "github.com/mathiasbq/supervisor/internal/exec"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// fakeClaudePath writes a shell script that prints fixed output and returns its path.
+func fakeClaudePath(t *testing.T, output string, exitCode int) string {
+	t.Helper()
+	dir := t.TempDir()
+	script := filepath.Join(dir, "claude")
+	var content string
+	if exitCode != 0 {
+		content = "#!/bin/sh\necho 'error' >&2\nexit 1\n"
+	} else {
+		content = "#!/bin/sh\necho '" + output + "'\n"
+	}
+	require.NoError(t, os.WriteFile(script, []byte(content), 0755))
+	return script
+}
+
+func TestExecutorParsesValidResult(t *testing.T) {
+	validJSON := `{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}`
+	claude := fakeClaudePath(t, validJSON, 0)
+
+	ex := iexec.New(iexec.Config{
+		ClaudeBinary: claude,
+		SystemPrompt: "you are a supervisor",
+		Timeout:      5 * time.Second,
+	})
+
+	result, err := ex.Run(context.Background(), iexec.Request{
+		SkillPrompt: "tdd rules",
+		TaskPrompt:  "run red phase",
+	})
+	require.NoError(t, err)
+	assert.Equal(t, "pass", result.Status)
+	assert.True(t, result.Verified)
+}
+
+func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) {
+	claude := fakeClaudePath(t, "", 1)
+
+	ex := iexec.New(iexec.Config{
+		ClaudeBinary: claude,
+		SystemPrompt: "you are a supervisor",
+		Timeout:      5 * time.Second,
+	})
+
+	_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"})
+	assert.Error(t, err)
+}
+
+func TestExecutorTimesOut(t *testing.T) {
+	dir := t.TempDir()
+	script := filepath.Join(dir, "claude")
+	require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755))
+
+	ex := iexec.New(iexec.Config{
+		ClaudeBinary: script,
+		SystemPrompt: "you are a supervisor",
+		Timeout:      100 * time.Millisecond,
+	})
+
+	_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
+	assert.ErrorContains(t, err, "timeout")
+}