hyperguild/internal/exec/litellm.go

package exec

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"net/http"
	"time"
)

// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
// and returns the raw assistant message text.
type LiteLLMExecutor struct {
	baseURL    string
	apiKey     string
	httpClient *http.Client
}

// NewLiteLLM creates a LiteLLMExecutor.
// timeout applies to the full HTTP round-trip per call.
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
	if timeout == 0 {
		timeout = 120 * time.Second
	}
	return &LiteLLMExecutor{
		baseURL:    baseURL,
		apiKey:     apiKey,
		httpClient: &http.Client{Timeout: timeout},
	}
}

type litellmMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type litellmRequest struct {
	Model    string           `json:"model"`
	Messages []litellmMessage `json:"messages"`
}

type litellmChoice struct {
	Message litellmMessage `json:"message"`
}

type litellmResponse struct {
	Choices []litellmChoice `json:"choices"`
}

// Complete sends system+user messages to the given model and returns the raw
// assistant text along with the round-trip duration in milliseconds.
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
	body := litellmRequest{
		Model: model,
		Messages: []litellmMessage{
			{Role: "system", Content: system},
			{Role: "user", Content: user},
		},
	}

	bodyBytes, err := json.Marshal(body)
	if err != nil {
		return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
	if err != nil {
		return "", 0, fmt.Errorf("litellm: create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")
	if e.apiKey != "" {
		httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
	}

	t0 := time.Now()
	resp, err := e.httpClient.Do(httpReq)
	if err != nil {
		return "", 0, fmt.Errorf("litellm: request failed: %w", err)
	}
	defer resp.Body.Close() //nolint:errcheck
	durationMs := time.Since(t0).Milliseconds()

	if resp.StatusCode != http.StatusOK {
		return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
	}

	var chatResp litellmResponse
	if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
		return "", 0, fmt.Errorf("litellm: decode response: %w", err)
	}
	if len(chatResp.Choices) == 0 {
		return "", 0, fmt.Errorf("litellm: no choices in response")
	}

	return chatResp.Choices[0].Message.Content, durationMs, nil
}