Compare commits
8 Commits
76f195de2a
...
6328766c7f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6328766c7f | ||
|
|
f1deedd39d | ||
|
|
5cb272a869 | ||
|
|
e96b39a812 | ||
|
|
5db5b33cd7 | ||
|
|
a32457b5bc | ||
|
|
e0be5f0f98 | ||
|
|
6d410b810b |
@@ -84,11 +84,26 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
executor := iexec.New(iexec.Config{
|
claudeExec := iexec.New(iexec.Config{
|
||||||
SystemPrompt: string(systemPrompt),
|
SystemPrompt: string(systemPrompt),
|
||||||
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
||||||
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
||||||
})
|
})
|
||||||
|
litellmExec := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||||
|
verifier := iexec.NewVerifier("", models.Verifier(), 0)
|
||||||
|
|
||||||
|
buildOrch := func(skill string) func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
return func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
rawChain := models.ChainFor(skill, req.Model)
|
||||||
|
chain := make([]iexec.ChainEntry, len(rawChain))
|
||||||
|
for i, m := range rawChain {
|
||||||
|
chain[i] = iexec.EntryFor(m)
|
||||||
|
}
|
||||||
|
attempts := make([]iexec.AttemptRecord, 0, len(chain))
|
||||||
|
orch := iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts)
|
||||||
|
return orch.Run(ctx, req)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tierFn := func(ctx context.Context) tier.Info {
|
tierFn := func(ctx context.Context) tier.Info {
|
||||||
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
||||||
@@ -98,8 +113,8 @@ func main() {
|
|||||||
reg.Register(tdd.New(tdd.Config{
|
reg.Register(tdd.New(tdd.Config{
|
||||||
SystemPrompt: string(systemPrompt),
|
SystemPrompt: string(systemPrompt),
|
||||||
SkillPrompt: string(tddPrompt),
|
SkillPrompt: string(tddPrompt),
|
||||||
DefaultModel: models.Resolve("tdd", ""),
|
DefaultModel: models.ChainFor("tdd", "")[0],
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("tdd"),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
}))
|
}))
|
||||||
reg.Register(brain.New(brain.Config{
|
reg.Register(brain.New(brain.Config{
|
||||||
@@ -113,33 +128,33 @@ func main() {
|
|||||||
}))
|
}))
|
||||||
reg.Register(retrospective.New(retrospective.Config{
|
reg.Register(retrospective.New(retrospective.Config{
|
||||||
SkillPrompt: string(retroPrompt),
|
SkillPrompt: string(retroPrompt),
|
||||||
DefaultModel: models.Resolve("retrospective", ""),
|
DefaultModel: models.ChainFor("retrospective", "")[0],
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("retrospective"),
|
||||||
}))
|
}))
|
||||||
reg.Register(review.New(review.Config{
|
reg.Register(review.New(review.Config{
|
||||||
SkillPrompt: string(reviewPrompt),
|
SkillPrompt: string(reviewPrompt),
|
||||||
DefaultModel: models.Resolve("review", ""),
|
DefaultModel: models.ChainFor("review", "")[0],
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("review"),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
}))
|
}))
|
||||||
reg.Register(skilldebug.New(skilldebug.Config{
|
reg.Register(skilldebug.New(skilldebug.Config{
|
||||||
SkillPrompt: string(debugPrompt),
|
SkillPrompt: string(debugPrompt),
|
||||||
DefaultModel: models.Resolve("debug", ""),
|
DefaultModel: models.ChainFor("debug", "")[0],
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("debug"),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
}))
|
}))
|
||||||
reg.Register(spec.New(spec.Config{
|
reg.Register(spec.New(spec.Config{
|
||||||
SkillPrompt: string(specPrompt),
|
SkillPrompt: string(specPrompt),
|
||||||
DefaultModel: models.Resolve("spec", ""),
|
DefaultModel: models.ChainFor("spec", "")[0],
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("spec"),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
}))
|
}))
|
||||||
reg.Register(trainer.New(trainer.Config{
|
reg.Register(trainer.New(trainer.Config{
|
||||||
ReaderPrompt: string(trainerReaderPrompt),
|
ReaderPrompt: string(trainerReaderPrompt),
|
||||||
WriterPrompt: string(trainerWriterPrompt),
|
WriterPrompt: string(trainerWriterPrompt),
|
||||||
DefaultModel: models.Resolve("trainer", ""),
|
DefaultModel: models.ChainFor("trainer", "")[0],
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: buildOrch("trainer"),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
BrainDir: cfg.BrainDir,
|
BrainDir: cfg.BrainDir,
|
||||||
}))
|
}))
|
||||||
|
|||||||
@@ -1,13 +1,41 @@
|
|||||||
# Model routing table — three-layer priority:
|
# Model routing chains — three-layer priority:
|
||||||
# 1. model param in MCP tool call (caller override)
|
# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation)
|
||||||
# 2. per-skill entry here
|
# 2. per-skill chain here
|
||||||
# 3. default (fallback)
|
# 3. default_chain fallback
|
||||||
default: ollama/qwen3-coder-30b-tuned
|
|
||||||
|
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
||||||
|
|
||||||
|
llama_swap_url: http://koala:8080 # for warm-state probing
|
||||||
|
|
||||||
|
default_chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
tdd:
|
||||||
review: ollama/devstral-tuned
|
chain:
|
||||||
debug: ollama/deepseek-r1-tuned
|
- ollama/qwen3-coder-30b-tuned
|
||||||
retrospective: ollama/qwen3-coder-30b-tuned
|
- claude-sonnet-4-6
|
||||||
spec: ollama/qwen3-coder-30b-tuned
|
review:
|
||||||
trainer: ollama/qwen3-coder-30b-tuned
|
chain:
|
||||||
|
- ollama/devstral-tuned
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
debug:
|
||||||
|
chain:
|
||||||
|
- ollama/deepseek-r1-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
spec:
|
||||||
|
chain:
|
||||||
|
- ollama/phi4
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
- claude-opus-4-6
|
||||||
|
retrospective:
|
||||||
|
chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
trainer:
|
||||||
|
chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|||||||
@@ -7,9 +7,15 @@ import (
|
|||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type skillChain struct {
|
||||||
|
Chain []string `yaml:"chain"`
|
||||||
|
}
|
||||||
|
|
||||||
type modelsFile struct {
|
type modelsFile struct {
|
||||||
Default string `yaml:"default"`
|
Verifier string `yaml:"verifier"`
|
||||||
Skills map[string]string `yaml:"skills"`
|
LlamaSwapURL string `yaml:"llama_swap_url"`
|
||||||
|
DefaultChain []string `yaml:"default_chain"`
|
||||||
|
Skills map[string]skillChain `yaml:"skills"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Models struct {
|
type Models struct {
|
||||||
@@ -28,16 +34,23 @@ func LoadModels(path string) (Models, error) {
|
|||||||
return Models{data: f}, nil
|
return Models{data: f}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve returns the model for a skill, respecting three-layer priority:
|
// Verifier returns the model name to use for all local-tier output verification.
|
||||||
// 1. override (from MCP call) — highest
|
func (m Models) Verifier() string { return m.data.Verifier }
|
||||||
// 2. per-skill default from models.yaml
|
|
||||||
// 3. global default
|
// LlamaSwapURL returns the llama-swap base URL for warm-state probing.
|
||||||
func (m Models) Resolve(skill, override string) string {
|
func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL }
|
||||||
|
|
||||||
|
// ChainFor returns the ordered list of model names for a skill.
|
||||||
|
// If override is non-empty, returns a single-entry chain (no escalation).
|
||||||
|
// Falls back to default_chain when the skill has no explicit entry.
|
||||||
|
func (m Models) ChainFor(skill, override string) []string {
|
||||||
if override != "" {
|
if override != "" {
|
||||||
return override
|
return []string{override}
|
||||||
}
|
}
|
||||||
if model, ok := m.data.Skills[skill]; ok {
|
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
||||||
return model
|
return sc.Chain
|
||||||
}
|
}
|
||||||
return m.data.Default
|
out := make([]string, len(m.data.DefaultChain))
|
||||||
|
copy(out, m.data.DefaultChain)
|
||||||
|
return out
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,35 +10,71 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestModelsResolve(t *testing.T) {
|
const testYAML = `
|
||||||
yaml := `
|
verifier: claude-sonnet-4-6
|
||||||
default: ollama/default-model
|
llama_swap_url: http://koala:8080
|
||||||
|
|
||||||
|
default_chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
review:
|
||||||
review: ollama/devstral-tuned
|
chain:
|
||||||
|
- ollama/devstral-tuned
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
spec:
|
||||||
|
chain:
|
||||||
|
- ollama/phi4
|
||||||
|
- claude-opus-4-6
|
||||||
`
|
`
|
||||||
|
|
||||||
|
func writeModels(t *testing.T, content string) string {
|
||||||
|
t.Helper()
|
||||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
f := filepath.Join(t.TempDir(), "models.yaml")
|
||||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
require.NoError(t, os.WriteFile(f, []byte(content), 0644))
|
||||||
|
return f
|
||||||
m, err := config.LoadModels(f)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.Resolve("tdd", ""))
|
|
||||||
assert.Equal(t, "ollama/devstral-tuned", m.Resolve("review", ""))
|
|
||||||
assert.Equal(t, "ollama/default-model", m.Resolve("unknown", ""))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModelsOverride(t *testing.T) {
|
func TestModelsVerifier(t *testing.T) {
|
||||||
yaml := `
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
default: ollama/default-model
|
require.NoError(t, err)
|
||||||
skills:
|
assert.Equal(t, "claude-sonnet-4-6", m.Verifier())
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
}
|
||||||
`
|
|
||||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
|
||||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
|
||||||
|
|
||||||
m, err := config.LoadModels(f)
|
func TestModelsLlamaSwapURL(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "http://koala:8080", m.LlamaSwapURL())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForSkillOverride(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, "anthropic/claude-sonnet-4-6", m.Resolve("tdd", "anthropic/claude-sonnet-4-6"))
|
chain := m.ChainFor("review", "")
|
||||||
|
require.Len(t, chain, 3)
|
||||||
|
assert.Equal(t, "ollama/devstral-tuned", chain[0])
|
||||||
|
assert.Equal(t, "ollama/gemma4", chain[1])
|
||||||
|
assert.Equal(t, "claude-sonnet-4-6", chain[2])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForDefaultFallback(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
chain := m.ChainFor("trainer", "") // not in skills map
|
||||||
|
require.Len(t, chain, 2)
|
||||||
|
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0])
|
||||||
|
assert.Equal(t, "claude-sonnet-4-6", chain[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForCallerOverride(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
chain := m.ChainFor("review", "claude-opus-4-6")
|
||||||
|
require.Len(t, chain, 1)
|
||||||
|
assert.Equal(t, "claude-opus-4-6", chain[0])
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,8 +72,11 @@ func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
|||||||
"--tools", tools,
|
"--tools", tools,
|
||||||
"--json-schema", Schema,
|
"--json-schema", Schema,
|
||||||
"--output-format", "json",
|
"--output-format", "json",
|
||||||
prompt,
|
|
||||||
}
|
}
|
||||||
|
if strings.HasPrefix(req.Model, "claude-") {
|
||||||
|
args = append(args, "--model", req.Model)
|
||||||
|
}
|
||||||
|
args = append(args, prompt)
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
||||||
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
||||||
|
|||||||
@@ -75,3 +75,58 @@ func TestExecutorTimesOut(t *testing.T) {
|
|||||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
||||||
assert.ErrorContains(t, err, "timeout")
|
assert.ErrorContains(t, err, "timeout")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
|
||||||
|
// The script captures its args to a temp file so we can assert --model was passed.
|
||||||
|
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||||
|
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
|
||||||
|
|
||||||
|
dir := t.TempDir()
|
||||||
|
script := filepath.Join(dir, "claude")
|
||||||
|
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||||
|
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||||
|
|
||||||
|
ex := iexec.New(iexec.Config{
|
||||||
|
ClaudeBinary: script,
|
||||||
|
SystemPrompt: "sys",
|
||||||
|
Timeout: 5 * time.Second,
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{
|
||||||
|
SkillPrompt: "review rules",
|
||||||
|
TaskPrompt: "do review",
|
||||||
|
Model: "claude-sonnet-4-6",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
argsData, err := os.ReadFile(argsFile)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
|
||||||
|
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||||
|
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
|
||||||
|
|
||||||
|
dir := t.TempDir()
|
||||||
|
script := filepath.Join(dir, "claude")
|
||||||
|
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||||
|
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||||
|
|
||||||
|
ex := iexec.New(iexec.Config{
|
||||||
|
ClaudeBinary: script,
|
||||||
|
SystemPrompt: "sys",
|
||||||
|
Timeout: 5 * time.Second,
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{
|
||||||
|
SkillPrompt: "review rules",
|
||||||
|
TaskPrompt: "do review",
|
||||||
|
Model: "ollama/devstral",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
argsData, err := os.ReadFile(argsFile)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotContains(t, string(argsData), "--model")
|
||||||
|
}
|
||||||
|
|||||||
103
internal/exec/litellm.go
Normal file
103
internal/exec/litellm.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package exec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
||||||
|
// Local models are expected to return a JSON object matching the Result schema
|
||||||
|
// as their response content — no envelope.
|
||||||
|
type LiteLLMExecutor struct {
|
||||||
|
baseURL string
|
||||||
|
apiKey string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLiteLLM creates a LiteLLMExecutor.
|
||||||
|
// timeout applies to the full HTTP round-trip per call.
|
||||||
|
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||||
|
return &LiteLLMExecutor{
|
||||||
|
baseURL: baseURL,
|
||||||
|
apiKey: apiKey,
|
||||||
|
httpClient: &http.Client{Timeout: timeout},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmMessage struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Messages []litellmMessage `json:"messages"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmChoice struct {
|
||||||
|
Message litellmMessage `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmResponse struct {
|
||||||
|
Choices []litellmChoice `json:"choices"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run dispatches req to the LiteLLM server and parses the Result from the
|
||||||
|
// assistant message content. Returns an error on network failure, non-200
|
||||||
|
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
||||||
|
// as automatic escalation triggers.
|
||||||
|
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
||||||
|
body := litellmRequest{
|
||||||
|
Model: req.Model,
|
||||||
|
Messages: []litellmMessage{
|
||||||
|
{Role: "system", Content: req.SkillPrompt},
|
||||||
|
{Role: "user", Content: req.TaskPrompt},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyBytes, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
if e.apiKey != "" {
|
||||||
|
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.httpClient.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var chatResp litellmResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
||||||
|
}
|
||||||
|
if len(chatResp.Choices) == 0 {
|
||||||
|
return Result{}, fmt.Errorf("litellm: no choices in response")
|
||||||
|
}
|
||||||
|
|
||||||
|
content := chatResp.Choices[0].Message.Content
|
||||||
|
var result Result
|
||||||
|
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
||||||
|
}
|
||||||
|
if err := result.Validate(); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
112
internal/exec/litellm_test.go
Normal file
112
internal/exec/litellm_test.go
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
package exec_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func validLiteLLMResult() iexec.Result {
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass",
|
||||||
|
Phase: "review",
|
||||||
|
Skill: "review",
|
||||||
|
ModelUsed: "ollama/devstral",
|
||||||
|
Message: "looks good",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||||
|
t.Helper()
|
||||||
|
content, err := json.Marshal(result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
resp := map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, err := json.Marshal(resp)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMParsesValidResult(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||||
|
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
result, err := ex.Run(context.Background(), iexec.Request{
|
||||||
|
SkillPrompt: "review rules",
|
||||||
|
TaskPrompt: "review the code",
|
||||||
|
Model: "ollama/devstral",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
assert.Equal(t, "review", result.Skill)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.ErrorContains(t, err, "503")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
resp := map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, _ := json.Marshal(resp)
|
||||||
|
_, _ = w.Write(data)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel() // Cancel immediately
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||||
|
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
197
internal/exec/orchestrator.go
Normal file
197
internal/exec/orchestrator.go
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
package exec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ChainEntry is one tier in an escalation chain.
|
||||||
|
type ChainEntry struct {
|
||||||
|
Model string // e.g. "ollama/phi4", "claude-sonnet-4-6"
|
||||||
|
Tier string // "local" | "subagent" | "managed"
|
||||||
|
IsCloud bool // true for claude-* models; skips verifier call
|
||||||
|
}
|
||||||
|
|
||||||
|
// EntryFor builds a ChainEntry from a model name string.
|
||||||
|
func EntryFor(model string) ChainEntry {
|
||||||
|
cloud := strings.HasPrefix(model, "claude-")
|
||||||
|
tier := "local"
|
||||||
|
if cloud {
|
||||||
|
tier = "subagent"
|
||||||
|
}
|
||||||
|
return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AttemptRecord captures the outcome of one tier attempt for session logging.
|
||||||
|
type AttemptRecord struct {
|
||||||
|
Model string
|
||||||
|
Tier string
|
||||||
|
DurationMs int64
|
||||||
|
WarmStart bool
|
||||||
|
Verdict string // "accept" | "escalate" | "error"
|
||||||
|
Feedback string
|
||||||
|
}
|
||||||
|
|
||||||
|
// VerifierFn is the interface the orchestrator uses to verify local output.
|
||||||
|
type VerifierFn interface {
|
||||||
|
Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
|
||||||
|
type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
|
||||||
|
|
||||||
|
// Orchestrator walks an escalation chain, delegating generation and verification.
|
||||||
|
// It implements the ExecutorFn shape expected by skill handlers.
|
||||||
|
type Orchestrator struct {
|
||||||
|
chain []ChainEntry
|
||||||
|
localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil
|
||||||
|
cloudRun ExecutorRunFn // for cloud tiers; may be nil
|
||||||
|
verifier VerifierFn
|
||||||
|
llamaSwapURL string
|
||||||
|
attempts *[]AttemptRecord
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewOrchestrator creates an Orchestrator.
|
||||||
|
// attempts is a pointer to a slice that will be appended to on each tier attempt.
|
||||||
|
// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
|
||||||
|
func NewOrchestrator(
|
||||||
|
chain []ChainEntry,
|
||||||
|
localRun ExecutorRunFn,
|
||||||
|
cloudRun ExecutorRunFn,
|
||||||
|
verifier VerifierFn,
|
||||||
|
llamaSwapURL string,
|
||||||
|
attempts *[]AttemptRecord,
|
||||||
|
) *Orchestrator {
|
||||||
|
return &Orchestrator{
|
||||||
|
chain: chain,
|
||||||
|
localRun: localRun,
|
||||||
|
cloudRun: cloudRun,
|
||||||
|
verifier: verifier,
|
||||||
|
llamaSwapURL: llamaSwapURL,
|
||||||
|
attempts: attempts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run walks the escalation chain and returns the first accepted result.
|
||||||
|
// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
|
||||||
|
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
|
||||||
|
taskPrompt := req.TaskPrompt
|
||||||
|
|
||||||
|
for _, entry := range o.chain {
|
||||||
|
warm := o.probeWarm(entry.Model)
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
tierReq := req
|
||||||
|
tierReq.Model = entry.Model
|
||||||
|
tierReq.TaskPrompt = taskPrompt
|
||||||
|
|
||||||
|
if entry.IsCloud {
|
||||||
|
result, genErr := o.cloudRun(ctx, tierReq)
|
||||||
|
dur := time.Since(start).Milliseconds()
|
||||||
|
verdict := "accept"
|
||||||
|
if genErr != nil {
|
||||||
|
verdict = "error"
|
||||||
|
}
|
||||||
|
o.appendAttempt(AttemptRecord{
|
||||||
|
Model: entry.Model,
|
||||||
|
Tier: entry.Tier,
|
||||||
|
DurationMs: dur,
|
||||||
|
WarmStart: warm,
|
||||||
|
Verdict: verdict,
|
||||||
|
})
|
||||||
|
if genErr == nil {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local tier.
|
||||||
|
result, genErr := o.localRun(ctx, tierReq)
|
||||||
|
dur := time.Since(start).Milliseconds()
|
||||||
|
|
||||||
|
if genErr != nil {
|
||||||
|
o.appendAttempt(AttemptRecord{
|
||||||
|
Model: entry.Model,
|
||||||
|
Tier: entry.Tier,
|
||||||
|
DurationMs: dur,
|
||||||
|
WarmStart: warm,
|
||||||
|
Verdict: "error",
|
||||||
|
Feedback: genErr.Error(),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
|
||||||
|
if verErr != nil {
|
||||||
|
// Treat verifier failure as escalate (safe default).
|
||||||
|
o.appendAttempt(AttemptRecord{
|
||||||
|
Model: entry.Model,
|
||||||
|
Tier: entry.Tier,
|
||||||
|
DurationMs: dur,
|
||||||
|
WarmStart: warm,
|
||||||
|
Verdict: "escalate",
|
||||||
|
Feedback: "verifier error: " + verErr.Error(),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if verdict.Accept {
|
||||||
|
o.appendAttempt(AttemptRecord{
|
||||||
|
Model: entry.Model,
|
||||||
|
Tier: entry.Tier,
|
||||||
|
DurationMs: dur,
|
||||||
|
WarmStart: warm,
|
||||||
|
Verdict: "accept",
|
||||||
|
})
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
o.appendAttempt(AttemptRecord{
|
||||||
|
Model: entry.Model,
|
||||||
|
Tier: entry.Tier,
|
||||||
|
DurationMs: dur,
|
||||||
|
WarmStart: warm,
|
||||||
|
Verdict: "escalate",
|
||||||
|
Feedback: verdict.Feedback,
|
||||||
|
})
|
||||||
|
// Inject verifier feedback into the next tier's task prompt.
|
||||||
|
taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
|
||||||
|
if o.attempts != nil {
|
||||||
|
*o.attempts = append(*o.attempts, rec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// probeWarm checks whether the model is currently loaded in llama-swap.
|
||||||
|
// Returns false on any error or if llamaSwapURL is empty.
|
||||||
|
func (o *Orchestrator) probeWarm(model string) bool {
|
||||||
|
if o.llamaSwapURL == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return strings.Contains(string(body), model)
|
||||||
|
}
|
||||||
151
internal/exec/orchestrator_test.go
Normal file
151
internal/exec/orchestrator_test.go
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
package exec_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// stubRunFn returns preset results sequentially.
|
||||||
|
type stubRunFn struct {
|
||||||
|
calls []stubCall
|
||||||
|
callIdx int
|
||||||
|
}
|
||||||
|
|
||||||
|
type stubCall struct {
|
||||||
|
result iexec.Result
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
|
||||||
|
if s.callIdx >= len(s.calls) {
|
||||||
|
return iexec.Result{}, errors.New("unexpected call")
|
||||||
|
}
|
||||||
|
c := s.calls[s.callIdx]
|
||||||
|
s.callIdx++
|
||||||
|
return c.result, c.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// stubVerifier returns preset verdicts sequentially.
|
||||||
|
type stubVerifier struct {
|
||||||
|
verdicts []iexec.Verdict
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
|
||||||
|
if s.idx >= len(s.verdicts) {
|
||||||
|
return iexec.Verdict{}, errors.New("unexpected verify call")
|
||||||
|
}
|
||||||
|
v := s.verdicts[s.idx]
|
||||||
|
s.idx++
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func okResult(skill string) iexec.Result {
|
||||||
|
return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrchestratorSingleLocalAccept(t *testing.T) {
|
||||||
|
local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||||
|
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||||
|
|
||||||
|
var attempts []iexec.AttemptRecord
|
||||||
|
orch := iexec.NewOrchestrator(
|
||||||
|
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||||
|
local.Run, nil, verifier, "", &attempts,
|
||||||
|
)
|
||||||
|
|
||||||
|
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
require.Len(t, attempts, 1)
|
||||||
|
assert.Equal(t, "local", attempts[0].Tier)
|
||||||
|
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
|
||||||
|
local := &stubRunFn{calls: []stubCall{
|
||||||
|
{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
|
||||||
|
{result: okResult("review")},
|
||||||
|
}}
|
||||||
|
verifier := &stubVerifier{verdicts: []iexec.Verdict{
|
||||||
|
{Accept: false, Feedback: "missing line refs"},
|
||||||
|
{Accept: true},
|
||||||
|
}}
|
||||||
|
|
||||||
|
var attempts []iexec.AttemptRecord
|
||||||
|
orch := iexec.NewOrchestrator(
|
||||||
|
[]iexec.ChainEntry{
|
||||||
|
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||||
|
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||||
|
},
|
||||||
|
local.Run, nil, verifier, "", &attempts,
|
||||||
|
)
|
||||||
|
|
||||||
|
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
require.Len(t, attempts, 2)
|
||||||
|
assert.Equal(t, "escalate", attempts[0].Verdict)
|
||||||
|
assert.Equal(t, "missing line refs", attempts[0].Feedback)
|
||||||
|
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
|
||||||
|
local := &stubRunFn{calls: []stubCall{
|
||||||
|
{err: errors.New("network failure")},
|
||||||
|
{result: okResult("review")},
|
||||||
|
}}
|
||||||
|
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||||
|
|
||||||
|
var attempts []iexec.AttemptRecord
|
||||||
|
orch := iexec.NewOrchestrator(
|
||||||
|
[]iexec.ChainEntry{
|
||||||
|
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||||
|
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||||
|
},
|
||||||
|
local.Run, nil, verifier, "", &attempts,
|
||||||
|
)
|
||||||
|
|
||||||
|
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, attempts, 2)
|
||||||
|
assert.Equal(t, "error", attempts[0].Verdict)
|
||||||
|
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
|
||||||
|
cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||||
|
verifier := &stubVerifier{} // no verdicts — must not be called
|
||||||
|
|
||||||
|
var attempts []iexec.AttemptRecord
|
||||||
|
orch := iexec.NewOrchestrator(
|
||||||
|
[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
|
||||||
|
nil, cloud.Run, verifier, "", &attempts,
|
||||||
|
)
|
||||||
|
|
||||||
|
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
require.Len(t, attempts, 1)
|
||||||
|
assert.Equal(t, "subagent", attempts[0].Tier)
|
||||||
|
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||||
|
assert.Equal(t, 0, verifier.idx) // verifier never called
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrchestratorAllTiersExhausted(t *testing.T) {
|
||||||
|
local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
|
||||||
|
|
||||||
|
var attempts []iexec.AttemptRecord
|
||||||
|
orch := iexec.NewOrchestrator(
|
||||||
|
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||||
|
local.Run, nil, &stubVerifier{}, "", &attempts,
|
||||||
|
)
|
||||||
|
|
||||||
|
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||||
|
assert.ErrorContains(t, err, "all tiers exhausted")
|
||||||
|
}
|
||||||
99
internal/exec/verifier.go
Normal file
99
internal/exec/verifier.go
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
package exec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Verdict is the output of a Claude verification call.
|
||||||
|
type Verdict struct {
|
||||||
|
Accept bool `json:"accept"`
|
||||||
|
Feedback string `json:"feedback"` // empty when Accept is true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verifier runs a focused Claude call to judge local model output.
|
||||||
|
type Verifier struct {
|
||||||
|
claudeBinary string
|
||||||
|
model string
|
||||||
|
timeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
||||||
|
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
||||||
|
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
||||||
|
if claudeBinary == "" {
|
||||||
|
claudeBinary = "claude"
|
||||||
|
}
|
||||||
|
if timeout == 0 {
|
||||||
|
timeout = 30 * time.Second
|
||||||
|
}
|
||||||
|
return &Verifier{
|
||||||
|
claudeBinary: claudeBinary,
|
||||||
|
model: model,
|
||||||
|
timeout: timeout,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
||||||
|
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
||||||
|
// to escalate. Returns an error on subprocess failure or unparseable response.
|
||||||
|
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
outputJSON, err := json.Marshal(output)
|
||||||
|
if err != nil {
|
||||||
|
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
||||||
|
|
||||||
|
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
||||||
|
|
||||||
|
Reply with JSON only — no other text:
|
||||||
|
{"accept": true, "feedback": ""}
|
||||||
|
or
|
||||||
|
{"accept": false, "feedback": "<one sentence reason>"}
|
||||||
|
|
||||||
|
## Skill discipline
|
||||||
|
%s
|
||||||
|
|
||||||
|
## Original task
|
||||||
|
%s
|
||||||
|
|
||||||
|
## Generated output
|
||||||
|
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
||||||
|
|
||||||
|
args := []string{
|
||||||
|
"--print",
|
||||||
|
"--permission-mode", "bypassPermissions",
|
||||||
|
}
|
||||||
|
if v.model != "" {
|
||||||
|
args = append(args, "--model", v.model)
|
||||||
|
}
|
||||||
|
args = append(args, prompt)
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
||||||
|
cmd.Env = os.Environ()
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
||||||
|
}
|
||||||
|
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var verdict Verdict
|
||||||
|
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
||||||
|
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
||||||
|
}
|
||||||
|
return verdict, nil
|
||||||
|
}
|
||||||
74
internal/exec/verifier_test.go
Normal file
74
internal/exec/verifier_test.go
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
package exec_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
|
||||||
|
t.Helper()
|
||||||
|
data, err := json.Marshal(verdict)
|
||||||
|
require.NoError(t, err)
|
||||||
|
dir := t.TempDir()
|
||||||
|
script := filepath.Join(dir, "claude")
|
||||||
|
content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
|
||||||
|
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||||
|
return script
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifierAccepts(t *testing.T) {
|
||||||
|
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
|
||||||
|
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||||
|
|
||||||
|
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||||
|
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, verdict.Accept)
|
||||||
|
assert.Empty(t, verdict.Feedback)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifierEscalates(t *testing.T) {
|
||||||
|
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
|
||||||
|
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||||
|
|
||||||
|
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||||
|
Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.False(t, verdict.Accept)
|
||||||
|
assert.Equal(t, "missing line references", verdict.Feedback)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
script := filepath.Join(dir, "claude")
|
||||||
|
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
|
||||||
|
|
||||||
|
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||||
|
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||||
|
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||||
|
})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifierErrorOnNonZeroExit(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
script := filepath.Join(dir, "claude")
|
||||||
|
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
|
||||||
|
|
||||||
|
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||||
|
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||||
|
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||||
|
})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
@@ -32,9 +32,14 @@ type Entry struct {
|
|||||||
type Attempt struct {
|
type Attempt struct {
|
||||||
Attempt int `json:"attempt"`
|
Attempt int `json:"attempt"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Tier string `json:"tier"` // local | subagent | managed
|
||||||
|
DurationMs int64 `json:"duration_ms"`
|
||||||
|
WarmStart bool `json:"warm_start"` // model already loaded in llama-swap
|
||||||
|
Verified bool `json:"verified"`
|
||||||
|
Verdict string `json:"verdict,omitempty"` // accept | escalate | error
|
||||||
|
Feedback string `json:"feedback,omitempty"` // verifier feedback on escalation
|
||||||
OutputSummary string `json:"output_summary,omitempty"`
|
OutputSummary string `json:"output_summary,omitempty"`
|
||||||
RunnerOutput string `json:"runner_output,omitempty"`
|
RunnerOutput string `json:"runner_output,omitempty"`
|
||||||
Verified bool `json:"verified"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append writes entry as a single JSON line to sessionsDir/{sessionID}.jsonl.
|
// Append writes entry as a single JSON line to sessionsDir/{sessionID}.jsonl.
|
||||||
|
|||||||
@@ -61,3 +61,22 @@ func TestRead_EmptyWhenNoFile(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Empty(t, entries)
|
assert.Empty(t, entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAttemptRoundTrip(t *testing.T) {
|
||||||
|
a := session.Attempt{
|
||||||
|
Attempt: 1,
|
||||||
|
Model: "ollama/devstral",
|
||||||
|
Tier: "local",
|
||||||
|
DurationMs: 4200,
|
||||||
|
WarmStart: true,
|
||||||
|
Verified: false,
|
||||||
|
Verdict: "escalate",
|
||||||
|
Feedback: "missing line references",
|
||||||
|
}
|
||||||
|
data, err := json.Marshal(a)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var got session.Attempt
|
||||||
|
require.NoError(t, json.Unmarshal(data, &got))
|
||||||
|
assert.Equal(t, a, got)
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user