diff --git a/internal/exec/executor.go b/internal/exec/executor.go new file mode 100644 index 0000000..c544394 --- /dev/null +++ b/internal/exec/executor.go @@ -0,0 +1,99 @@ +package exec + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" +) + +// Config holds executor configuration. +type Config struct { + ClaudeBinary string // path to claude binary, defaults to "claude" + SystemPrompt string // contents of supervisor CLAUDE.md + Timeout time.Duration // per-invocation timeout, default 120s + LiteLLMBaseURL string // passed to Claude so it can delegate to Ollama + LiteLLMAPIKey string // passed to Claude for LiteLLM auth +} + +// Request is the input to a single supervisor invocation. +type Request struct { + SkillPrompt string // skill-specific discipline (e.g. tdd.md contents) + TaskPrompt string // the specific task (phase, project_root, spec, model) + Model string // resolved model name, passed in task prompt + Tools string // comma-separated allowed tools, default "Bash,Read,Write" +} + +// Executor spawns a claude instance and captures its structured JSON output. +type Executor struct { + cfg Config +} + +func New(cfg Config) *Executor { + if cfg.ClaudeBinary == "" { + cfg.ClaudeBinary = "claude" + } + if cfg.Timeout == 0 { + cfg.Timeout = 120 * time.Second + } + return &Executor{cfg: cfg} +} + +func (e *Executor) Run(ctx context.Context, req Request) (Result, error) { + ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout) + defer cancel() + + tools := req.Tools + if tools == "" { + tools = "Bash,Read,Write" + } + + // Build the full prompt: system rules + skill rules + infra context + task + litellmCtx := fmt.Sprintf( + "LITELLM_BASE_URL: %s\nLITELLM_API_KEY: %s", + e.cfg.LiteLLMBaseURL, e.cfg.LiteLLMAPIKey, + ) + prompt := strings.Join([]string{ + e.cfg.SystemPrompt, + "---", + req.SkillPrompt, + "---", + litellmCtx, + "---", + req.TaskPrompt, + }, "\n\n") + + args := []string{ + "--print", + "--bare", + "--permission-mode", "bypassPermissions", + "--tools", tools, + "--json-schema", Schema, + "--output-format", "text", + prompt, + } + + cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + if ctx.Err() != nil { + return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout) + } + return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String()) + } + + var r Result + if err := json.Unmarshal(stdout.Bytes(), &r); err != nil { + return Result{}, fmt.Errorf("parse result JSON: %w — raw output: %s", err, stdout.String()) + } + if err := r.Validate(); err != nil { + return Result{}, fmt.Errorf("invalid result: %w", err) + } + return r, nil +} diff --git a/internal/exec/executor_test.go b/internal/exec/executor_test.go new file mode 100644 index 0000000..ee6cf2c --- /dev/null +++ b/internal/exec/executor_test.go @@ -0,0 +1,75 @@ +package exec_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeClaudePath writes a shell script that prints fixed output and returns its path. +func fakeClaudePath(t *testing.T, output string, exitCode int) string { + t.Helper() + dir := t.TempDir() + script := filepath.Join(dir, "claude") + var content string + if exitCode != 0 { + content = "#!/bin/sh\necho 'error' >&2\nexit 1\n" + } else { + content = "#!/bin/sh\necho '" + output + "'\n" + } + require.NoError(t, os.WriteFile(script, []byte(content), 0755)) + return script +} + +func TestExecutorParsesValidResult(t *testing.T) { + validJSON := `{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}` + claude := fakeClaudePath(t, validJSON, 0) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: claude, + SystemPrompt: "you are a supervisor", + Timeout: 5 * time.Second, + }) + + result, err := ex.Run(context.Background(), iexec.Request{ + SkillPrompt: "tdd rules", + TaskPrompt: "run red phase", + }) + require.NoError(t, err) + assert.Equal(t, "pass", result.Status) + assert.True(t, result.Verified) +} + +func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) { + claude := fakeClaudePath(t, "", 1) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: claude, + SystemPrompt: "you are a supervisor", + Timeout: 5 * time.Second, + }) + + _, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"}) + assert.Error(t, err) +} + +func TestExecutorTimesOut(t *testing.T) { + dir := t.TempDir() + script := filepath.Join(dir, "claude") + require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755)) + + ex := iexec.New(iexec.Config{ + ClaudeBinary: script, + SystemPrompt: "you are a supervisor", + Timeout: 100 * time.Millisecond, + }) + + _, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"}) + assert.ErrorContains(t, err, "timeout") +}