The routing decision is about reasoning capacity, not cost or provider. Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the implicit Anthropic dependency from the routing pod — both models go through LiteLLM. Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL, HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL, Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel, log decision "claude_fallback" → "thinking_fallback". Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
85 lines
2.5 KiB
Go
85 lines
2.5 KiB
Go
package routing
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
)
|
|
|
|
// CompleteFunc matches the signature used by every skill package's Config.
|
|
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
|
|
|
// RunInput captures the per-call inputs the dispatch wrapper needs.
|
|
type RunInput struct {
|
|
Skill string
|
|
System string
|
|
User string
|
|
SessionID string
|
|
ProjectRoot string
|
|
}
|
|
|
|
// Router composes a pass-rate fetcher, a decision policy, a session logger,
|
|
// and a LiteLLM client. Skill packages receive Router.Run as their CompleteFunc.
|
|
type Router struct {
|
|
Fetcher *Fetcher
|
|
Logger *Logger
|
|
Policy Policy
|
|
FastModel string
|
|
ThinkingModel string
|
|
Complete CompleteFunc
|
|
}
|
|
|
|
// Run executes one skill call: decides local vs claude, calls LiteLLM, logs the
|
|
// decision. On local-side error, falls open by retrying once on the Claude model.
|
|
func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
|
|
pr, ferr := r.Fetcher.Get(ctx, in.Skill)
|
|
if ferr != nil {
|
|
slog.Warn("router: pass-rate unreachable, defaulting to local", "skill", in.Skill, "err", ferr)
|
|
pr = nil
|
|
}
|
|
hash := CanonicalHash(in.System, in.User)
|
|
decision := r.Policy.Decide(pr, hash)
|
|
|
|
model := r.ThinkingModel
|
|
if decision == DecideLocal {
|
|
model = r.FastModel
|
|
}
|
|
|
|
out, ms, err := r.Complete(ctx, model, in.System, in.User)
|
|
if lerr := r.Logger.LogDecision(ctx, LogEntry{
|
|
SessionID: in.SessionID,
|
|
Skill: in.Skill,
|
|
Decision: decision.String(),
|
|
Message: fmt.Sprintf("model=%s, pass_rate=%s", model, formatPassRate(pr)),
|
|
ProjectRoot: in.ProjectRoot,
|
|
DurationMs: ms,
|
|
Failed: err != nil,
|
|
}); lerr != nil {
|
|
slog.Warn("router: log decision failed", "skill", in.Skill, "err", lerr)
|
|
}
|
|
|
|
if err != nil && decision == DecideLocal {
|
|
slog.Warn("router: fast failed, falling open to thinking model", "skill", in.Skill, "err", err)
|
|
out, ms, err = r.Complete(ctx, r.ThinkingModel, in.System, in.User)
|
|
if lerr := r.Logger.LogDecision(ctx, LogEntry{
|
|
SessionID: in.SessionID,
|
|
Skill: in.Skill,
|
|
Decision: "thinking_fallback",
|
|
Message: fmt.Sprintf("model=%s, after-fast-error", r.ThinkingModel),
|
|
ProjectRoot: in.ProjectRoot,
|
|
DurationMs: ms,
|
|
Failed: err != nil,
|
|
}); lerr != nil {
|
|
slog.Warn("router: log decision failed", "skill", in.Skill, "err", lerr)
|
|
}
|
|
}
|
|
return out, ms, err
|
|
}
|
|
|
|
func formatPassRate(pr *float64) string {
|
|
if pr == nil {
|
|
return "null"
|
|
}
|
|
return fmt.Sprintf("%.2f", *pr)
|
|
}
|