refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider. Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the implicit Anthropic dependency from the routing pod — both models go through LiteLLM. Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL, HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL, Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel, log decision "claude_fallback" → "thinking_fallback". Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,7 +13,7 @@ import (
|
||||
type LogEntry struct {
|
||||
SessionID string
|
||||
Skill string // the original skill the call routed (e.g., "review")
|
||||
Decision string // "local" or "claude" or "claude_fallback"
|
||||
Decision string // "local" or "thinking" or "thinking_fallback"
|
||||
Message string // free-form, e.g. "model=qwen35, pass_rate=0.94"
|
||||
ProjectRoot string
|
||||
DurationMs int64
|
||||
|
||||
@@ -24,8 +24,8 @@ type Router struct {
|
||||
Fetcher *Fetcher
|
||||
Logger *Logger
|
||||
Policy Policy
|
||||
LocalModel string
|
||||
ClaudeModel string
|
||||
FastModel string
|
||||
ThinkingModel string
|
||||
Complete CompleteFunc
|
||||
}
|
||||
|
||||
@@ -40,9 +40,9 @@ func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
|
||||
hash := CanonicalHash(in.System, in.User)
|
||||
decision := r.Policy.Decide(pr, hash)
|
||||
|
||||
model := r.ClaudeModel
|
||||
model := r.ThinkingModel
|
||||
if decision == DecideLocal {
|
||||
model = r.LocalModel
|
||||
model = r.FastModel
|
||||
}
|
||||
|
||||
out, ms, err := r.Complete(ctx, model, in.System, in.User)
|
||||
@@ -59,13 +59,13 @@ func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
|
||||
}
|
||||
|
||||
if err != nil && decision == DecideLocal {
|
||||
slog.Warn("router: local failed, falling open to claude", "skill", in.Skill, "err", err)
|
||||
out, ms, err = r.Complete(ctx, r.ClaudeModel, in.System, in.User)
|
||||
slog.Warn("router: fast failed, falling open to thinking model", "skill", in.Skill, "err", err)
|
||||
out, ms, err = r.Complete(ctx, r.ThinkingModel, in.System, in.User)
|
||||
if lerr := r.Logger.LogDecision(ctx, LogEntry{
|
||||
SessionID: in.SessionID,
|
||||
Skill: in.Skill,
|
||||
Decision: "claude_fallback",
|
||||
Message: fmt.Sprintf("model=%s, after-local-error", r.ClaudeModel),
|
||||
Decision: "thinking_fallback",
|
||||
Message: fmt.Sprintf("model=%s, after-fast-error", r.ThinkingModel),
|
||||
ProjectRoot: in.ProjectRoot,
|
||||
DurationMs: ms,
|
||||
Failed: err != nil,
|
||||
|
||||
@@ -49,12 +49,12 @@ func newRouter(t *testing.T, llm *fakeLLM, passRate float64) (*routing.Router, *
|
||||
t.Cleanup(brain.Close)
|
||||
|
||||
r := &routing.Router{
|
||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||
Logger: routing.NewLogger(brain.URL),
|
||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||
LocalModel: "qwen35",
|
||||
ClaudeModel: "claude-sonnet-4-6",
|
||||
Complete: llm.Complete,
|
||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||
Logger: routing.NewLogger(brain.URL),
|
||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||
FastModel: "koala/qwen35-9b-fast",
|
||||
ThinkingModel: "iguana/gemma4-26b",
|
||||
Complete: llm.Complete,
|
||||
}
|
||||
return r, brain, brain
|
||||
}
|
||||
@@ -72,10 +72,10 @@ func TestRouterRoutesLocalAtHighPassRate(t *testing.T) {
|
||||
llm.mu.Lock()
|
||||
defer llm.mu.Unlock()
|
||||
require.Len(t, llm.calls, 1)
|
||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
||||
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||
}
|
||||
|
||||
func TestRouterRoutesClaudeAtLowPassRate(t *testing.T) {
|
||||
func TestRouterRoutesThinkingAtLowPassRate(t *testing.T) {
|
||||
llm := &fakeLLM{resp: "ok"}
|
||||
r, _, _ := newRouter(t, llm, 0.3)
|
||||
|
||||
@@ -87,12 +87,12 @@ func TestRouterRoutesClaudeAtLowPassRate(t *testing.T) {
|
||||
llm.mu.Lock()
|
||||
defer llm.mu.Unlock()
|
||||
require.Len(t, llm.calls, 1)
|
||||
assert.Equal(t, "claude-sonnet-4-6", llm.calls[0].Model)
|
||||
assert.Equal(t, "iguana/gemma4-26b", llm.calls[0].Model)
|
||||
}
|
||||
|
||||
func TestRouterFailsOpenLocalErrorToClaude(t *testing.T) {
|
||||
llm := &fakeLLM{resp: "ok-after-fallback", err: errors.New("local boom"), errOn: "qwen35"}
|
||||
r, _, _ := newRouter(t, llm, 0.95) // would route local
|
||||
func TestRouterFailsOpenFastErrorToThinking(t *testing.T) {
|
||||
llm := &fakeLLM{resp: "ok-after-fallback", err: errors.New("fast boom"), errOn: "koala/qwen35-9b-fast"}
|
||||
r, _, _ := newRouter(t, llm, 0.95) // would route fast
|
||||
|
||||
out, _, err := r.Run(context.Background(), routing.RunInput{
|
||||
Skill: "review", System: "sys", User: "user", SessionID: "s3",
|
||||
@@ -103,12 +103,12 @@ func TestRouterFailsOpenLocalErrorToClaude(t *testing.T) {
|
||||
llm.mu.Lock()
|
||||
defer llm.mu.Unlock()
|
||||
require.Len(t, llm.calls, 2)
|
||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
||||
assert.Equal(t, "claude-sonnet-4-6", llm.calls[1].Model)
|
||||
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||
assert.Equal(t, "iguana/gemma4-26b", llm.calls[1].Model)
|
||||
}
|
||||
|
||||
func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
||||
// Brain returns 500 → fetcher errors → router treats pass rate as nil → local.
|
||||
func TestRouterDefaultsToFastWhenBrainUnreachable(t *testing.T) {
|
||||
// Brain returns 500 → fetcher errors → router treats pass rate as nil → fast.
|
||||
brain := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
http.Error(w, "down", http.StatusInternalServerError)
|
||||
}))
|
||||
@@ -116,12 +116,12 @@ func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
||||
|
||||
llm := &fakeLLM{resp: "ok"}
|
||||
r := &routing.Router{
|
||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||
Logger: routing.NewLogger(brain.URL),
|
||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||
LocalModel: "qwen35",
|
||||
ClaudeModel: "claude-sonnet-4-6",
|
||||
Complete: llm.Complete,
|
||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||
Logger: routing.NewLogger(brain.URL),
|
||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||
FastModel: "koala/qwen35-9b-fast",
|
||||
ThinkingModel: "iguana/gemma4-26b",
|
||||
Complete: llm.Complete,
|
||||
}
|
||||
|
||||
_, _, err := r.Run(context.Background(), routing.RunInput{
|
||||
@@ -132,5 +132,5 @@ func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
||||
llm.mu.Lock()
|
||||
defer llm.mu.Unlock()
|
||||
require.Len(t, llm.calls, 1)
|
||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
||||
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user