package exec import ( "context" "fmt" "io" "net/http" "strings" "time" ) // ChainEntry is one tier in an escalation chain. type ChainEntry struct { Model string // e.g. "ollama/phi4", "claude-sonnet-4-6" Tier string // "local" | "subagent" | "managed" IsCloud bool // true for claude-* models; skips verifier call } // EntryFor builds a ChainEntry from a model name string. func EntryFor(model string) ChainEntry { cloud := strings.HasPrefix(model, "claude-") tier := "local" if cloud { tier = "subagent" } return ChainEntry{Model: model, Tier: tier, IsCloud: cloud} } // AttemptRecord captures the outcome of one tier attempt for session logging. type AttemptRecord struct { Model string Tier string DurationMs int64 WarmStart bool Verdict string // "accept" | "escalate" | "error" Feedback string } // VerifierFn is the interface the orchestrator uses to verify local output. type VerifierFn interface { Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) } // ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run. type ExecutorRunFn func(ctx context.Context, req Request) (Result, error) // Orchestrator walks an escalation chain, delegating generation and verification. // It implements the ExecutorFn shape expected by skill handlers. type Orchestrator struct { chain []ChainEntry localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil cloudRun ExecutorRunFn // for cloud tiers; may be nil verifier VerifierFn llamaSwapURL string attempts *[]AttemptRecord } // NewOrchestrator creates an Orchestrator. // attempts is a pointer to a slice that will be appended to on each tier attempt. // Pass nil for localRun or cloudRun if no tiers of that type exist in the chain. func NewOrchestrator( chain []ChainEntry, localRun ExecutorRunFn, cloudRun ExecutorRunFn, verifier VerifierFn, llamaSwapURL string, attempts *[]AttemptRecord, ) *Orchestrator { return &Orchestrator{ chain: chain, localRun: localRun, cloudRun: cloudRun, verifier: verifier, llamaSwapURL: llamaSwapURL, attempts: attempts, } } // Run walks the escalation chain and returns the first accepted result. // Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error). func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) { taskPrompt := req.TaskPrompt for _, entry := range o.chain { warm := o.probeWarm(entry.Model) start := time.Now() tierReq := req tierReq.Model = entry.Model tierReq.TaskPrompt = taskPrompt if entry.IsCloud { result, genErr := o.cloudRun(ctx, tierReq) dur := time.Since(start).Milliseconds() verdict := "accept" if genErr != nil { verdict = "error" } o.appendAttempt(AttemptRecord{ Model: entry.Model, Tier: entry.Tier, DurationMs: dur, WarmStart: warm, Verdict: verdict, }) if genErr == nil { return result, nil } continue } // Local tier. result, genErr := o.localRun(ctx, tierReq) dur := time.Since(start).Milliseconds() if genErr != nil { o.appendAttempt(AttemptRecord{ Model: entry.Model, Tier: entry.Tier, DurationMs: dur, WarmStart: warm, Verdict: "error", Feedback: genErr.Error(), }) continue } verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result) if verErr != nil { // Treat verifier failure as escalate (safe default). o.appendAttempt(AttemptRecord{ Model: entry.Model, Tier: entry.Tier, DurationMs: dur, WarmStart: warm, Verdict: "escalate", Feedback: "verifier error: " + verErr.Error(), }) continue } if verdict.Accept { o.appendAttempt(AttemptRecord{ Model: entry.Model, Tier: entry.Tier, DurationMs: dur, WarmStart: warm, Verdict: "accept", }) return result, nil } o.appendAttempt(AttemptRecord{ Model: entry.Model, Tier: entry.Tier, DurationMs: dur, WarmStart: warm, Verdict: "escalate", Feedback: verdict.Feedback, }) // Inject verifier feedback into the next tier's task prompt. taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback } return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain)) } func (o *Orchestrator) appendAttempt(rec AttemptRecord) { if o.attempts != nil { *o.attempts = append(*o.attempts, rec) } } // probeWarm checks whether the model is currently loaded in llama-swap. // Returns false on any error or if llamaSwapURL is empty. func (o *Orchestrator) probeWarm(model string) bool { if o.llamaSwapURL == "" { return false } ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil) if err != nil { return false } resp, err := http.DefaultClient.Do(req) if err != nil { return false } defer resp.Body.Close() //nolint:errcheck body, err := io.ReadAll(resp.Body) if err != nil { return false } return strings.Contains(string(body), model) }