Files
Mathias Bergqvist 5b207425ed
All checks were successful
CI / Lint / Test / Vet (pull_request) Successful in 10s
CI / Mirror to GitHub (pull_request) Has been skipped
refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider.
Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking
model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the
implicit Anthropic dependency from the routing pod — both models go
through LiteLLM.

Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL,
HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL,
Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel,
log decision "claude_fallback" → "thinking_fallback".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 16:39:42 +02:00

80 lines
2.1 KiB
Go

package routing
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"
)
// LogEntry describes a single routing decision to log via the brain MCP.
type LogEntry struct {
SessionID string
Skill string // the original skill the call routed (e.g., "review")
Decision string // "local" or "thinking" or "thinking_fallback"
Message string // free-form, e.g. "model=qwen35, pass_rate=0.94"
ProjectRoot string
DurationMs int64
Failed bool // true → final_status: "fail"; false → "skip"
}
// Logger posts session_log entries to a brain MCP at BrainURL + /mcp.
type Logger struct {
BrainURL string
HTTP *http.Client
}
// NewLogger creates a Logger with a 2-second HTTP timeout.
func NewLogger(brainURL string) *Logger {
return &Logger{
BrainURL: brainURL,
HTTP: &http.Client{Timeout: 2 * time.Second},
}
}
// LogDecision posts a session_log MCP call. Errors are returned but the caller
// MUST NOT block real work on them — logging is best-effort.
func (l *Logger) LogDecision(ctx context.Context, e LogEntry) error {
status := "skip"
if e.Failed {
status = "fail"
}
payload := map[string]any{
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": map[string]any{
"name": "session_log",
"arguments": map[string]any{
"session_id": e.SessionID,
"skill": "_routing",
"phase": "decide",
"final_status": status,
"message": fmt.Sprintf("%s: %s — %s", e.Skill, e.Decision, e.Message),
"duration_ms": e.DurationMs,
"project_root": e.ProjectRoot,
},
},
}
body, err := json.Marshal(payload)
if err != nil {
return fmt.Errorf("log: marshal: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.BrainURL+"/mcp", bytes.NewReader(body))
if err != nil {
return fmt.Errorf("log: build request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := l.HTTP.Do(req)
if err != nil {
return fmt.Errorf("log: request: %w", err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("log: server returned status %d", resp.StatusCode)
}
return nil
}