refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider. Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the implicit Anthropic dependency from the routing pod — both models go through LiteLLM. Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL, HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL, Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel, log decision "claude_fallback" → "thinking_fallback". Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,8 +14,8 @@ type RoutingConfig struct {
|
||||
LiteLLMBaseURL string // LITELLM_BASE_URL, default http://piguard:4000
|
||||
LiteLLMAPIKey string // LITELLM_API_KEY
|
||||
BrainURL string // BRAIN_URL, default http://ingestion.supervisor:3300
|
||||
LocalModel string // HYPERGUILD_LOCAL_MODEL, default qwen35
|
||||
ClaudeModel string // HYPERGUILD_CLAUDE_MODEL, default claude-sonnet-4-6
|
||||
FastModel string // HYPERGUILD_FAST_MODEL, default koala/qwen35-9b-fast
|
||||
ThinkingModel string // HYPERGUILD_THINKING_MODEL, default iguana/gemma4-26b
|
||||
// RouteLocalFloor and RouteLocalCeil intentionally invert the usual
|
||||
// floor < ceil mathematical convention: Floor (default 0.90) is the
|
||||
// UPPER boundary — at/above it, always route local; Ceil (default 0.70)
|
||||
@@ -34,8 +34,8 @@ func LoadRouting() (RoutingConfig, error) {
|
||||
LiteLLMBaseURL: envOr("LITELLM_BASE_URL", "http://piguard:4000"),
|
||||
LiteLLMAPIKey: os.Getenv("LITELLM_API_KEY"),
|
||||
BrainURL: envOr("BRAIN_URL", "http://ingestion.supervisor:3300"),
|
||||
LocalModel: envOr("HYPERGUILD_LOCAL_MODEL", "qwen35"),
|
||||
ClaudeModel: envOr("HYPERGUILD_CLAUDE_MODEL", "claude-sonnet-4-6"),
|
||||
FastModel: envOr("HYPERGUILD_FAST_MODEL", "koala/qwen35-9b-fast"),
|
||||
ThinkingModel: envOr("HYPERGUILD_THINKING_MODEL", "iguana/gemma4-26b"),
|
||||
}
|
||||
|
||||
floor, err := parseFloatEnv("HYPERGUILD_ROUTE_LOCAL_FLOOR", 0.90)
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
func TestLoadRoutingDefaults(t *testing.T) {
|
||||
for _, k := range []string{
|
||||
"ROUTING_PORT", "ROUTING_MCP_TOKEN", "LITELLM_BASE_URL", "LITELLM_API_KEY",
|
||||
"BRAIN_URL", "HYPERGUILD_LOCAL_MODEL", "HYPERGUILD_CLAUDE_MODEL",
|
||||
"BRAIN_URL", "HYPERGUILD_FAST_MODEL", "HYPERGUILD_THINKING_MODEL",
|
||||
"HYPERGUILD_ROUTE_LOCAL_FLOOR", "HYPERGUILD_ROUTE_LOCAL_CEIL",
|
||||
"HYPERGUILD_PASS_RATE_TTL_SECONDS",
|
||||
} {
|
||||
@@ -24,8 +24,8 @@ func TestLoadRoutingDefaults(t *testing.T) {
|
||||
assert.Equal(t, "", cfg.MCPAuthToken)
|
||||
assert.Equal(t, "http://piguard:4000", cfg.LiteLLMBaseURL)
|
||||
assert.Equal(t, "http://ingestion.supervisor:3300", cfg.BrainURL)
|
||||
assert.Equal(t, "qwen35", cfg.LocalModel)
|
||||
assert.Equal(t, "claude-sonnet-4-6", cfg.ClaudeModel)
|
||||
assert.Equal(t, "koala/qwen35-9b-fast", cfg.FastModel)
|
||||
assert.Equal(t, "iguana/gemma4-26b", cfg.ThinkingModel)
|
||||
assert.InDelta(t, 0.90, cfg.RouteLocalFloor, 1e-9)
|
||||
assert.InDelta(t, 0.70, cfg.RouteLocalCeil, 1e-9)
|
||||
assert.Equal(t, 60, cfg.PassRateTTLSeconds)
|
||||
@@ -38,8 +38,8 @@ func TestLoadRoutingFromEnv(t *testing.T) {
|
||||
t.Setenv("LITELLM_BASE_URL", "http://localhost:4000")
|
||||
t.Setenv("LITELLM_API_KEY", "lk")
|
||||
t.Setenv("BRAIN_URL", "http://localhost:3300")
|
||||
t.Setenv("HYPERGUILD_LOCAL_MODEL", "qwen2-7b")
|
||||
t.Setenv("HYPERGUILD_CLAUDE_MODEL", "claude-opus-4-7")
|
||||
t.Setenv("HYPERGUILD_FAST_MODEL", "koala/phi4-14b")
|
||||
t.Setenv("HYPERGUILD_THINKING_MODEL", "iguana/qwen3-14b-think")
|
||||
t.Setenv("HYPERGUILD_ROUTE_LOCAL_FLOOR", "0.85")
|
||||
t.Setenv("HYPERGUILD_ROUTE_LOCAL_CEIL", "0.65")
|
||||
t.Setenv("HYPERGUILD_PASS_RATE_TTL_SECONDS", "30")
|
||||
@@ -51,8 +51,8 @@ func TestLoadRoutingFromEnv(t *testing.T) {
|
||||
assert.Equal(t, "http://localhost:4000", cfg.LiteLLMBaseURL)
|
||||
assert.Equal(t, "lk", cfg.LiteLLMAPIKey)
|
||||
assert.Equal(t, "http://localhost:3300", cfg.BrainURL)
|
||||
assert.Equal(t, "qwen2-7b", cfg.LocalModel)
|
||||
assert.Equal(t, "claude-opus-4-7", cfg.ClaudeModel)
|
||||
assert.Equal(t, "koala/phi4-14b", cfg.FastModel)
|
||||
assert.Equal(t, "iguana/qwen3-14b-think", cfg.ThinkingModel)
|
||||
assert.InDelta(t, 0.85, cfg.RouteLocalFloor, 1e-9)
|
||||
assert.InDelta(t, 0.65, cfg.RouteLocalCeil, 1e-9)
|
||||
assert.Equal(t, 30, cfg.PassRateTTLSeconds)
|
||||
|
||||
Reference in New Issue
Block a user