refactor: replace orchestrator/verifier chain with direct LiteLLM calls

Drop the three-layer Claude subprocess orchestration (local model → Claude verifier → cloud escalation). Skills now call LiteLLM directly and return plain text to Claude Code, which decides what to do with it. - Delete executor, orchestrator, verifier, result, attempts packages - Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error) - Replace ExecutorFn with CompleteFunc in all 6 skill configs - Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"} - Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor - Bump version to v0.5.0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 16:19:09 +02:00
parent 823de23213
commit ce45592730
34 changed files with 266 additions and 1432 deletions
--- a/internal/exec/litellm.go
+++ b/internal/exec/litellm.go
@@ -9,9 +9,8 @@ import (
 	"time"
 )

-// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
-// Local models are expected to return a JSON object matching the Result schema
-// as their response content — no envelope.
+// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
+// and returns the raw assistant message text.
 type LiteLLMExecutor struct {
 	baseURL    string
 	apiKey     string
@@ -21,9 +20,12 @@ type LiteLLMExecutor struct {
 // NewLiteLLM creates a LiteLLMExecutor.
 // timeout applies to the full HTTP round-trip per call.
 func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
+	if timeout == 0 {
+		timeout = 120 * time.Second
+	}
 	return &LiteLLMExecutor{
-		baseURL: baseURL,
-		apiKey:  apiKey,
+		baseURL:    baseURL,
+		apiKey:     apiKey,
 		httpClient: &http.Client{Timeout: timeout},
 	}
 }
@@ -46,58 +48,50 @@ type litellmResponse struct {
 	Choices []litellmChoice `json:"choices"`
 }

-// Run dispatches req to the LiteLLM server and parses the Result from the
-// assistant message content. Returns an error on network failure, non-200
-// status, or unparseable/invalid JSON — all of which the Orchestrator treats
-// as automatic escalation triggers.
-func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
+// Complete sends system+user messages to the given model and returns the raw
+// assistant text along with the round-trip duration in milliseconds.
+func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
 	body := litellmRequest{
-		Model: req.Model,
+		Model: model,
 		Messages: []litellmMessage{
-			{Role: "system", Content: req.SkillPrompt},
-			{Role: "user", Content: req.TaskPrompt},
+			{Role: "system", Content: system},
+			{Role: "user", Content: user},
 		},
 	}

 	bodyBytes, err := json.Marshal(body)
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
+		return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
 	}

 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: create request: %w", err)
+		return "", 0, fmt.Errorf("litellm: create request: %w", err)
 	}
 	httpReq.Header.Set("Content-Type", "application/json")
 	if e.apiKey != "" {
 		httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
 	}

+	t0 := time.Now()
 	resp, err := e.httpClient.Do(httpReq)
 	if err != nil {
-		return Result{}, fmt.Errorf("litellm: request failed: %w", err)
+		return "", 0, fmt.Errorf("litellm: request failed: %w", err)
 	}
 	defer resp.Body.Close() //nolint:errcheck
+	durationMs := time.Since(t0).Milliseconds()

 	if resp.StatusCode != http.StatusOK {
-		return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
+		return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
 	}

 	var chatResp litellmResponse
 	if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
-		return Result{}, fmt.Errorf("litellm: decode response: %w", err)
+		return "", 0, fmt.Errorf("litellm: decode response: %w", err)
 	}
 	if len(chatResp.Choices) == 0 {
-		return Result{}, fmt.Errorf("litellm: no choices in response")
+		return "", 0, fmt.Errorf("litellm: no choices in response")
 	}

-	content := chatResp.Choices[0].Message.Content
-	var result Result
-	if err := json.Unmarshal([]byte(content), &result); err != nil {
-		return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
-	}
-	if err := result.Validate(); err != nil {
-		return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
-	}
-	return result, nil
+	return chatResp.Choices[0].Message.Content, durationMs, nil
 }