feat(exec): add LiteLLM HTTP executor for local model dispatch
This commit is contained in:
103
internal/exec/litellm.go
Normal file
103
internal/exec/litellm.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package exec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
||||||
|
// Local models are expected to return a JSON object matching the Result schema
|
||||||
|
// as their response content — no envelope.
|
||||||
|
type LiteLLMExecutor struct {
|
||||||
|
baseURL string
|
||||||
|
apiKey string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLiteLLM creates a LiteLLMExecutor.
|
||||||
|
// timeout applies to the full HTTP round-trip per call.
|
||||||
|
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||||
|
return &LiteLLMExecutor{
|
||||||
|
baseURL: baseURL,
|
||||||
|
apiKey: apiKey,
|
||||||
|
httpClient: &http.Client{Timeout: timeout},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmMessage struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Messages []litellmMessage `json:"messages"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmChoice struct {
|
||||||
|
Message litellmMessage `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type litellmResponse struct {
|
||||||
|
Choices []litellmChoice `json:"choices"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run dispatches req to the LiteLLM server and parses the Result from the
|
||||||
|
// assistant message content. Returns an error on network failure, non-200
|
||||||
|
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
||||||
|
// as automatic escalation triggers.
|
||||||
|
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
||||||
|
body := litellmRequest{
|
||||||
|
Model: req.Model,
|
||||||
|
Messages: []litellmMessage{
|
||||||
|
{Role: "system", Content: req.SkillPrompt},
|
||||||
|
{Role: "user", Content: req.TaskPrompt},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyBytes, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
if e.apiKey != "" {
|
||||||
|
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.httpClient.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var chatResp litellmResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
||||||
|
}
|
||||||
|
if len(chatResp.Choices) == 0 {
|
||||||
|
return Result{}, fmt.Errorf("litellm: no choices in response")
|
||||||
|
}
|
||||||
|
|
||||||
|
content := chatResp.Choices[0].Message.Content
|
||||||
|
var result Result
|
||||||
|
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
||||||
|
}
|
||||||
|
if err := result.Validate(); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
112
internal/exec/litellm_test.go
Normal file
112
internal/exec/litellm_test.go
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
package exec_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func validLiteLLMResult() iexec.Result {
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass",
|
||||||
|
Phase: "review",
|
||||||
|
Skill: "review",
|
||||||
|
ModelUsed: "ollama/devstral",
|
||||||
|
Message: "looks good",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||||
|
t.Helper()
|
||||||
|
content, err := json.Marshal(result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
resp := map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, err := json.Marshal(resp)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMParsesValidResult(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||||
|
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
result, err := ex.Run(context.Background(), iexec.Request{
|
||||||
|
SkillPrompt: "review rules",
|
||||||
|
TaskPrompt: "review the code",
|
||||||
|
Model: "ollama/devstral",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
assert.Equal(t, "review", result.Skill)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.ErrorContains(t, err, "503")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
resp := map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, _ := json.Marshal(resp)
|
||||||
|
_, _ = w.Write(data)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel() // Cancel immediately
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||||
|
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user