Adds an opt-in cross-encoder rerank step between BM25 retrieval and LLM synthesis. With BRAIN_RERANKER_URL set, brain_answer retrieves BM25 top-20, scores each excerpt against the query via Qwen3-Reranker on Ollama, drops the "no" answers, and forwards up to 5 surviving sources to the LLM. Unset, behaviour is unchanged (BM25 top-10 → LLM). The reranker is a *filter*, not a re-ranker: Qwen3-Reranker emits a binary yes/no token under its native chat template, and ties within the "yes" set are broken by BM25 rank — what got retrieved first stays ahead. New package ingestion/internal/reranker: - Client with URL, Model, HTTP fields. - New(url, model) returns nil on empty url so callers can treat "feature disabled" as a single nil check. - Score(ctx, query, docs) issues one /api/generate call per doc using the Qwen3-Reranker yes/no chat template (verbatim, because the model was trained on this exact wording). Parses the first non-think token. Wiring: - mcp.Server gains a WithReranker fluent setter to keep NewServer signature stable. - brain_answer's BM25 limit jumps to 20 only when a reranker is wired, to give the filter something to do. - cmd/server/main.go reads BRAIN_RERANKER_URL (+ optional BRAIN_RERANKER_MODEL, default dengcao/Qwen3-Reranker-0.6B:F16). Tests cover: nil-on-empty-url, ordered yes/no scoring, request shape (model, prompt contents, yes/no template), ambiguous response → 0, empty doc slice, upstream-error propagation, plus an end-to-end brain_answer integration that proves only the relevant note reaches the LLM when noise.md is rejected. Closes hyperguild#7.
185 lines
6.0 KiB
Go
185 lines
6.0 KiB
Go
// ingestion/cmd/server/main.go
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/watcher"
|
|
)
|
|
|
|
func envOr(key, fallback string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func envInt(key string, fallback int) int {
|
|
if v := os.Getenv(key); v != "" {
|
|
if n, err := strconv.Atoi(v); err == nil {
|
|
return n
|
|
}
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func main() {
|
|
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
|
|
|
|
brainDir := envOr("INGEST_BRAIN_DIR", "../brain")
|
|
port := envOr("INGEST_PORT", "3300")
|
|
|
|
llmURL := envOr("INGEST_LLM_URL", "http://iguana:4000/v1")
|
|
llmKey := os.Getenv("INGEST_LLM_KEY")
|
|
llmModel := envOr("INGEST_LLM_MODEL", "koala/qwen35-9b-fast")
|
|
llmTimeoutMins := envInt("INGEST_LLM_TIMEOUT", 15)
|
|
chunkSize := envInt("INGEST_CHUNK_SIZE", 6000)
|
|
watchInterval := envInt("INGEST_WATCH_INTERVAL", 30)
|
|
|
|
llmClient := llm.New(llmURL, llmKey, llmModel, time.Duration(llmTimeoutMins)*time.Minute)
|
|
|
|
pipelineCfg := pipeline.Config{
|
|
Complete: llmClient.Complete,
|
|
ChunkSize: chunkSize,
|
|
}
|
|
|
|
h := api.NewHandler(brainDir, logger, pipelineCfg)
|
|
|
|
var answerComplete pipeline.CompleteFunc
|
|
if primaryURL := os.Getenv("BRAIN_LLM_PRIMARY_URL"); primaryURL != "" {
|
|
primaryModel := envOr("BRAIN_LLM_PRIMARY_MODEL", "gemma4:31b")
|
|
primaryKey := os.Getenv("BERGET_API_KEY")
|
|
timeoutMS := envInt("BRAIN_LLM_TIMEOUT_MS", 10000)
|
|
timeout := time.Duration(timeoutMS) * time.Millisecond
|
|
|
|
primary := llm.New(primaryURL, primaryKey, primaryModel, timeout)
|
|
router := &llm.Router{Primary: primary}
|
|
|
|
if fallbackURL := os.Getenv("BRAIN_LLM_FALLBACK_URL"); fallbackURL != "" {
|
|
fallbackModel := envOr("BRAIN_LLM_FALLBACK_MODEL", "gemma4:31b")
|
|
router.Fallback = llm.New(fallbackURL, "", fallbackModel, timeout)
|
|
}
|
|
answerComplete = router.Complete
|
|
logger.Info("brain answer LLM configured", "primary", primaryURL, "model", primaryModel)
|
|
}
|
|
|
|
mcpSrv := mcp.NewServer(brainDir, &pipelineCfg, llmClient.Complete, answerComplete)
|
|
if rerankURL := os.Getenv("BRAIN_RERANKER_URL"); rerankURL != "" {
|
|
rerankModel := envOr("BRAIN_RERANKER_MODEL", "dengcao/Qwen3-Reranker-0.6B:F16")
|
|
mcpSrv = mcpSrv.WithReranker(reranker.New(rerankURL, rerankModel))
|
|
logger.Info("brain reranker configured", "url", rerankURL, "model", rerankModel)
|
|
}
|
|
|
|
mcpToken := os.Getenv("BRAIN_MCP_TOKEN")
|
|
if mcpToken == "" {
|
|
logger.Error("BRAIN_MCP_TOKEN not set")
|
|
os.Exit(1)
|
|
}
|
|
|
|
ctx := context.Background()
|
|
if watchInterval > 0 {
|
|
watcher.Start(ctx, watcher.Config{
|
|
BrainDir: brainDir,
|
|
Interval: time.Duration(watchInterval) * time.Second,
|
|
Pipeline: pipelineCfg,
|
|
})
|
|
}
|
|
|
|
mux := http.NewServeMux()
|
|
mux.HandleFunc("POST /query", h.Query)
|
|
mux.HandleFunc("POST /write", h.Write)
|
|
mux.HandleFunc("POST /index", h.Index)
|
|
mux.HandleFunc("POST /ingest", h.Ingest)
|
|
mux.HandleFunc("POST /ingest-path", h.IngestPath)
|
|
mux.HandleFunc("POST /ingest-raw", h.IngestRaw)
|
|
mux.HandleFunc("POST /backfill-refs", h.BackfillRefs)
|
|
mux.HandleFunc("GET /pass-rate", h.PassRate)
|
|
var jwtValidator *auth.Validator
|
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
|
audience := os.Getenv("MCP_AUDIENCE")
|
|
v, err := auth.NewValidator(dexURL, audience)
|
|
if err != nil {
|
|
logger.Error("build jwt validator", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
jwtValidator = v
|
|
logger.Info("jwt auth enabled", "issuer", dexURL)
|
|
}
|
|
|
|
// Resource-metadata URL is only emitted on 401 when Dex OAuth is
|
|
// configured. Static-Bearer-only deployments leave this empty so
|
|
// clients never see an OAuth challenge.
|
|
var resourceMetadataURL string
|
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
|
resourceURL := os.Getenv("MCP_RESOURCE_URL")
|
|
mux.HandleFunc("GET /.well-known/oauth-protected-resource",
|
|
auth.ProtectedResourceHandler(resourceURL, dexURL))
|
|
if resourceURL != "" {
|
|
resourceMetadataURL = strings.TrimRight(resourceURL, "/") + "/.well-known/oauth-protected-resource"
|
|
}
|
|
}
|
|
|
|
mux.Handle("/mcp", mcp.BearerAuth(mcpToken, jwtValidator, resourceMetadataURL, mcpSrv))
|
|
|
|
// Opt-in OAuth 2.0 client_credentials flow for claude.ai's custom-MCP
|
|
// integration UI, which has no static-Bearer field. Setting both
|
|
// OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET enables the token exchange;
|
|
// setting only one is misconfiguration → fail fast.
|
|
oauthID := os.Getenv("OAUTH_CLIENT_ID")
|
|
oauthSecret := os.Getenv("OAUTH_CLIENT_SECRET")
|
|
switch {
|
|
case oauthID != "" && oauthSecret != "":
|
|
issuer := os.Getenv("MCP_RESOURCE_URL")
|
|
if issuer == "" {
|
|
logger.Error("OAUTH_CLIENT_ID/SECRET set but MCP_RESOURCE_URL is empty; cannot derive issuer")
|
|
os.Exit(1)
|
|
}
|
|
mux.HandleFunc("GET /.well-known/oauth-authorization-server",
|
|
oauth.MetadataHandler(issuer))
|
|
mux.HandleFunc("POST /oauth/token", oauth.TokenHandler(oauth.TokenConfig{
|
|
ClientID: oauthID,
|
|
ClientSecret: oauthSecret,
|
|
AccessToken: mcpToken,
|
|
}))
|
|
logger.Info("oauth client_credentials enabled", "issuer", strings.TrimRight(issuer, "/"))
|
|
case oauthID == "" && oauthSecret == "":
|
|
// disabled — that's fine
|
|
default:
|
|
logger.Error("OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET must be set together")
|
|
os.Exit(1)
|
|
}
|
|
|
|
addr := ":" + port
|
|
watchIntervalLog := "disabled"
|
|
if watchInterval > 0 {
|
|
watchIntervalLog = fmt.Sprintf("%ds", watchInterval)
|
|
}
|
|
logger.Info("ingestion server starting",
|
|
"addr", addr,
|
|
"brain_dir", brainDir,
|
|
"llm_url", llmURL,
|
|
"llm_model", llmModel,
|
|
"chunk_size", chunkSize,
|
|
"watch_interval", watchIntervalLog,
|
|
"mcp_enabled", true,
|
|
)
|
|
if err := http.ListenAndServe(addr, mux); err != nil {
|
|
logger.Error("server stopped", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|