feat(config): replace single-model config with chain-based routing
Implements escalation chains per skill with three-layer priority: 1. Caller override (model param) — no escalation 2. Per-skill chain from models.yaml 3. default_chain fallback New APIs: - Verifier() — fixed verifier for output validation - LlamaSwapURL() — base URL for warm-state probing - ChainFor(skill, override) — ordered model list for escalation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,13 +1,41 @@
|
|||||||
# Model routing table — three-layer priority:
|
# Model routing chains — three-layer priority:
|
||||||
# 1. model param in MCP tool call (caller override)
|
# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation)
|
||||||
# 2. per-skill entry here
|
# 2. per-skill chain here
|
||||||
# 3. default (fallback)
|
# 3. default_chain fallback
|
||||||
default: ollama/qwen3-coder-30b-tuned
|
|
||||||
|
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
||||||
|
|
||||||
|
llama_swap_url: http://koala:8080 # for warm-state probing
|
||||||
|
|
||||||
|
default_chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
tdd:
|
||||||
review: ollama/devstral-tuned
|
chain:
|
||||||
debug: ollama/deepseek-r1-tuned
|
- ollama/qwen3-coder-30b-tuned
|
||||||
retrospective: ollama/qwen3-coder-30b-tuned
|
- claude-sonnet-4-6
|
||||||
spec: ollama/qwen3-coder-30b-tuned
|
review:
|
||||||
trainer: ollama/qwen3-coder-30b-tuned
|
chain:
|
||||||
|
- ollama/devstral-tuned
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
debug:
|
||||||
|
chain:
|
||||||
|
- ollama/deepseek-r1-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
spec:
|
||||||
|
chain:
|
||||||
|
- ollama/phi4
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
- claude-opus-4-6
|
||||||
|
retrospective:
|
||||||
|
chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
trainer:
|
||||||
|
chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|||||||
@@ -7,9 +7,15 @@ import (
|
|||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type skillChain struct {
|
||||||
|
Chain []string `yaml:"chain"`
|
||||||
|
}
|
||||||
|
|
||||||
type modelsFile struct {
|
type modelsFile struct {
|
||||||
Default string `yaml:"default"`
|
Verifier string `yaml:"verifier"`
|
||||||
Skills map[string]string `yaml:"skills"`
|
LlamaSwapURL string `yaml:"llama_swap_url"`
|
||||||
|
DefaultChain []string `yaml:"default_chain"`
|
||||||
|
Skills map[string]skillChain `yaml:"skills"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Models struct {
|
type Models struct {
|
||||||
@@ -28,16 +34,23 @@ func LoadModels(path string) (Models, error) {
|
|||||||
return Models{data: f}, nil
|
return Models{data: f}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve returns the model for a skill, respecting three-layer priority:
|
// Verifier returns the model name to use for all local-tier output verification.
|
||||||
// 1. override (from MCP call) — highest
|
func (m Models) Verifier() string { return m.data.Verifier }
|
||||||
// 2. per-skill default from models.yaml
|
|
||||||
// 3. global default
|
// LlamaSwapURL returns the llama-swap base URL for warm-state probing.
|
||||||
func (m Models) Resolve(skill, override string) string {
|
func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL }
|
||||||
|
|
||||||
|
// ChainFor returns the ordered list of model names for a skill.
|
||||||
|
// If override is non-empty, returns a single-entry chain (no escalation).
|
||||||
|
// Falls back to default_chain when the skill has no explicit entry.
|
||||||
|
func (m Models) ChainFor(skill, override string) []string {
|
||||||
if override != "" {
|
if override != "" {
|
||||||
return override
|
return []string{override}
|
||||||
}
|
}
|
||||||
if model, ok := m.data.Skills[skill]; ok {
|
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
||||||
return model
|
return sc.Chain
|
||||||
}
|
}
|
||||||
return m.data.Default
|
out := make([]string, len(m.data.DefaultChain))
|
||||||
|
copy(out, m.data.DefaultChain)
|
||||||
|
return out
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,35 +10,71 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestModelsResolve(t *testing.T) {
|
const testYAML = `
|
||||||
yaml := `
|
verifier: claude-sonnet-4-6
|
||||||
default: ollama/default-model
|
llama_swap_url: http://koala:8080
|
||||||
|
|
||||||
|
default_chain:
|
||||||
|
- ollama/qwen3-coder-30b-tuned
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
review:
|
||||||
review: ollama/devstral-tuned
|
chain:
|
||||||
|
- ollama/devstral-tuned
|
||||||
|
- ollama/gemma4
|
||||||
|
- claude-sonnet-4-6
|
||||||
|
spec:
|
||||||
|
chain:
|
||||||
|
- ollama/phi4
|
||||||
|
- claude-opus-4-6
|
||||||
`
|
`
|
||||||
|
|
||||||
|
func writeModels(t *testing.T, content string) string {
|
||||||
|
t.Helper()
|
||||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
f := filepath.Join(t.TempDir(), "models.yaml")
|
||||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
require.NoError(t, os.WriteFile(f, []byte(content), 0644))
|
||||||
|
return f
|
||||||
m, err := config.LoadModels(f)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.Resolve("tdd", ""))
|
|
||||||
assert.Equal(t, "ollama/devstral-tuned", m.Resolve("review", ""))
|
|
||||||
assert.Equal(t, "ollama/default-model", m.Resolve("unknown", ""))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModelsOverride(t *testing.T) {
|
func TestModelsVerifier(t *testing.T) {
|
||||||
yaml := `
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
default: ollama/default-model
|
require.NoError(t, err)
|
||||||
skills:
|
assert.Equal(t, "claude-sonnet-4-6", m.Verifier())
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
}
|
||||||
`
|
|
||||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
|
||||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
|
||||||
|
|
||||||
m, err := config.LoadModels(f)
|
func TestModelsLlamaSwapURL(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "http://koala:8080", m.LlamaSwapURL())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForSkillOverride(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, "anthropic/claude-sonnet-4-6", m.Resolve("tdd", "anthropic/claude-sonnet-4-6"))
|
chain := m.ChainFor("review", "")
|
||||||
|
require.Len(t, chain, 3)
|
||||||
|
assert.Equal(t, "ollama/devstral-tuned", chain[0])
|
||||||
|
assert.Equal(t, "ollama/gemma4", chain[1])
|
||||||
|
assert.Equal(t, "claude-sonnet-4-6", chain[2])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForDefaultFallback(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
chain := m.ChainFor("trainer", "") // not in skills map
|
||||||
|
require.Len(t, chain, 2)
|
||||||
|
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0])
|
||||||
|
assert.Equal(t, "claude-sonnet-4-6", chain[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsChainForCallerOverride(t *testing.T) {
|
||||||
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
chain := m.ChainFor("review", "claude-opus-4-6")
|
||||||
|
require.Len(t, chain, 1)
|
||||||
|
assert.Equal(t, "claude-opus-4-6", chain[0])
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user