Files
hyperguild/internal/routing/router_test.go
Mathias Bergqvist 5b207425ed
All checks were successful
CI / Lint / Test / Vet (pull_request) Successful in 10s
CI / Mirror to GitHub (pull_request) Has been skipped
refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider.
Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking
model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the
implicit Anthropic dependency from the routing pod — both models go
through LiteLLM.

Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL,
HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL,
Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel,
log decision "claude_fallback" → "thinking_fallback".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 16:39:42 +02:00

137 lines
4.0 KiB
Go

package routing_test
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
"github.com/mathiasbq/supervisor/internal/routing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type fakeLLM struct {
mu sync.Mutex
calls []struct{ Model, System, User string }
resp string
err error
errOn string // if non-empty, only the named model errors
}
func (f *fakeLLM) Complete(_ context.Context, model, system, user string) (string, int64, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.calls = append(f.calls, struct{ Model, System, User string }{model, system, user})
if f.errOn == model {
return "", 0, f.err
}
if f.err != nil && f.errOn == "" {
return "", 0, f.err
}
return f.resp, 100, nil
}
func newRouter(t *testing.T, llm *fakeLLM, passRate float64) (*routing.Router, *httptest.Server, *httptest.Server) {
t.Helper()
brain := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/pass-rate":
_ = json.NewEncoder(w).Encode(map[string]any{"pass_rate": passRate})
case "/mcp":
_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "id": 1, "result": map[string]any{}})
}
}))
t.Cleanup(brain.Close)
r := &routing.Router{
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
Logger: routing.NewLogger(brain.URL),
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
FastModel: "koala/qwen35-9b-fast",
ThinkingModel: "iguana/gemma4-26b",
Complete: llm.Complete,
}
return r, brain, brain
}
func TestRouterRoutesLocalAtHighPassRate(t *testing.T) {
llm := &fakeLLM{resp: "ok"}
r, _, _ := newRouter(t, llm, 0.95)
out, _, err := r.Run(context.Background(), routing.RunInput{
Skill: "review", System: "sys", User: "user", SessionID: "s1", ProjectRoot: "/p",
})
require.NoError(t, err)
assert.Equal(t, "ok", out)
llm.mu.Lock()
defer llm.mu.Unlock()
require.Len(t, llm.calls, 1)
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
}
func TestRouterRoutesThinkingAtLowPassRate(t *testing.T) {
llm := &fakeLLM{resp: "ok"}
r, _, _ := newRouter(t, llm, 0.3)
_, _, err := r.Run(context.Background(), routing.RunInput{
Skill: "review", System: "sys", User: "user", SessionID: "s2",
})
require.NoError(t, err)
llm.mu.Lock()
defer llm.mu.Unlock()
require.Len(t, llm.calls, 1)
assert.Equal(t, "iguana/gemma4-26b", llm.calls[0].Model)
}
func TestRouterFailsOpenFastErrorToThinking(t *testing.T) {
llm := &fakeLLM{resp: "ok-after-fallback", err: errors.New("fast boom"), errOn: "koala/qwen35-9b-fast"}
r, _, _ := newRouter(t, llm, 0.95) // would route fast
out, _, err := r.Run(context.Background(), routing.RunInput{
Skill: "review", System: "sys", User: "user", SessionID: "s3",
})
require.NoError(t, err)
assert.Equal(t, "ok-after-fallback", out)
llm.mu.Lock()
defer llm.mu.Unlock()
require.Len(t, llm.calls, 2)
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
assert.Equal(t, "iguana/gemma4-26b", llm.calls[1].Model)
}
func TestRouterDefaultsToFastWhenBrainUnreachable(t *testing.T) {
// Brain returns 500 → fetcher errors → router treats pass rate as nil → fast.
brain := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "down", http.StatusInternalServerError)
}))
defer brain.Close()
llm := &fakeLLM{resp: "ok"}
r := &routing.Router{
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
Logger: routing.NewLogger(brain.URL),
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
FastModel: "koala/qwen35-9b-fast",
ThinkingModel: "iguana/gemma4-26b",
Complete: llm.Complete,
}
_, _, err := r.Run(context.Background(), routing.RunInput{
Skill: "review", System: "sys", User: "user", SessionID: "s4",
})
require.NoError(t, err)
llm.mu.Lock()
defer llm.mu.Unlock()
require.Len(t, llm.calls, 1)
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
}