Drop the three-layer Claude subprocess orchestration (local model →
Claude verifier → cloud escalation). Skills now call LiteLLM directly
and return plain text to Claude Code, which decides what to do with it.
- Delete executor, orchestrator, verifier, result, attempts packages
- Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error)
- Replace ExecutorFn with CompleteFunc in all 6 skill configs
- Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"}
- Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor
- Bump version to v0.5.0
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
98 lines
2.5 KiB
Go
98 lines
2.5 KiB
Go
package exec
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
|
|
// and returns the raw assistant message text.
|
|
type LiteLLMExecutor struct {
|
|
baseURL string
|
|
apiKey string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// NewLiteLLM creates a LiteLLMExecutor.
|
|
// timeout applies to the full HTTP round-trip per call.
|
|
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
|
if timeout == 0 {
|
|
timeout = 120 * time.Second
|
|
}
|
|
return &LiteLLMExecutor{
|
|
baseURL: baseURL,
|
|
apiKey: apiKey,
|
|
httpClient: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
type litellmMessage struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
type litellmRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []litellmMessage `json:"messages"`
|
|
}
|
|
|
|
type litellmChoice struct {
|
|
Message litellmMessage `json:"message"`
|
|
}
|
|
|
|
type litellmResponse struct {
|
|
Choices []litellmChoice `json:"choices"`
|
|
}
|
|
|
|
// Complete sends system+user messages to the given model and returns the raw
|
|
// assistant text along with the round-trip duration in milliseconds.
|
|
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
|
|
body := litellmRequest{
|
|
Model: model,
|
|
Messages: []litellmMessage{
|
|
{Role: "system", Content: system},
|
|
{Role: "user", Content: user},
|
|
},
|
|
}
|
|
|
|
bodyBytes, err := json.Marshal(body)
|
|
if err != nil {
|
|
return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
|
|
}
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
|
if err != nil {
|
|
return "", 0, fmt.Errorf("litellm: create request: %w", err)
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/json")
|
|
if e.apiKey != "" {
|
|
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
|
}
|
|
|
|
t0 := time.Now()
|
|
resp, err := e.httpClient.Do(httpReq)
|
|
if err != nil {
|
|
return "", 0, fmt.Errorf("litellm: request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close() //nolint:errcheck
|
|
durationMs := time.Since(t0).Milliseconds()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
|
}
|
|
|
|
var chatResp litellmResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
|
return "", 0, fmt.Errorf("litellm: decode response: %w", err)
|
|
}
|
|
if len(chatResp.Choices) == 0 {
|
|
return "", 0, fmt.Errorf("litellm: no choices in response")
|
|
}
|
|
|
|
return chatResp.Choices[0].Message.Content, durationMs, nil
|
|
}
|