120 lines
2.7 KiB
Go
120 lines
2.7 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Client calls an OpenAI-compatible chat completions endpoint.
|
|
type Client struct {
|
|
baseURL string
|
|
apiKey string
|
|
model string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// New constructs a Client.
|
|
func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
|
|
return &Client{
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
apiKey: apiKey,
|
|
model: model,
|
|
httpClient: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
type chatRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []message `json:"messages"`
|
|
Temperature float64 `json:"temperature"`
|
|
}
|
|
|
|
type message struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
type chatResponse struct {
|
|
Choices []struct {
|
|
Message message `json:"message"`
|
|
} `json:"choices"`
|
|
}
|
|
|
|
// Complete sends a system + user message and returns the assistant's reply.
|
|
// Retries once on HTTP 429 using Retry-After header or 5s backoff.
|
|
func (c *Client) Complete(ctx context.Context, system, user string) (string, error) {
|
|
body := chatRequest{
|
|
Model: c.model,
|
|
Messages: []message{
|
|
{Role: "system", Content: system},
|
|
{Role: "user", Content: user},
|
|
},
|
|
Temperature: 0.2,
|
|
}
|
|
b, err := json.Marshal(body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
do := func() (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/chat/completions", bytes.NewReader(b))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
if c.apiKey != "" {
|
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
}
|
|
return c.httpClient.Do(req)
|
|
}
|
|
|
|
resp, err := do()
|
|
if err != nil {
|
|
return "", fmt.Errorf("call LLM: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode == http.StatusTooManyRequests {
|
|
resp.Body.Close()
|
|
wait := 5 * time.Second
|
|
if ra := resp.Header.Get("Retry-After"); ra != "" {
|
|
if secs, err := strconv.Atoi(ra); err == nil {
|
|
wait = time.Duration(secs) * time.Second
|
|
}
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return "", ctx.Err()
|
|
case <-time.After(wait):
|
|
}
|
|
resp, err = do()
|
|
if err != nil {
|
|
return "", fmt.Errorf("retry LLM call: %w", err)
|
|
}
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
out, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("LLM returned %d: %s", resp.StatusCode, out)
|
|
}
|
|
|
|
var cr chatResponse
|
|
if err := json.Unmarshal(out, &cr); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
if len(cr.Choices) == 0 {
|
|
return "", fmt.Errorf("LLM returned no choices")
|
|
}
|
|
return cr.Choices[0].Message.Content, nil
|
|
}
|