hyperguild/ingestion/cmd/server/main.go

// ingestion/cmd/server/main.go
package main

import (
	"context"
	"fmt"
	"log/slog"
	"net/http"
	"os"
	"strconv"
	"strings"
	"time"

	"github.com/mathiasbq/hyperguild/ingestion/internal/api"
	"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
	"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
	"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
	"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
	"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
	"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
	"github.com/mathiasbq/hyperguild/ingestion/internal/watcher"
)

func envOr(key, fallback string) string {
	if v := os.Getenv(key); v != "" {
		return v
	}
	return fallback
}

func envInt(key string, fallback int) int {
	if v := os.Getenv(key); v != "" {
		if n, err := strconv.Atoi(v); err == nil {
			return n
		}
	}
	return fallback
}

func main() {
	logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))

	brainDir := envOr("INGEST_BRAIN_DIR", "../brain")
	port := envOr("INGEST_PORT", "3300")

	llmURL := envOr("INGEST_LLM_URL", "http://iguana:4000/v1")
	llmKey := os.Getenv("INGEST_LLM_KEY")
	llmModel := envOr("INGEST_LLM_MODEL", "koala/qwen35-9b-fast")
	llmTimeoutMins := envInt("INGEST_LLM_TIMEOUT", 15)
	chunkSize := envInt("INGEST_CHUNK_SIZE", 6000)
	watchInterval := envInt("INGEST_WATCH_INTERVAL", 30)

	llmClient := llm.New(llmURL, llmKey, llmModel, time.Duration(llmTimeoutMins)*time.Minute)

	pipelineCfg := pipeline.Config{
		Complete:  llmClient.Complete,
		ChunkSize: chunkSize,
	}

	h := api.NewHandler(brainDir, logger, pipelineCfg)

	var answerComplete pipeline.CompleteFunc
	if primaryURL := os.Getenv("BRAIN_LLM_PRIMARY_URL"); primaryURL != "" {
		primaryModel := envOr("BRAIN_LLM_PRIMARY_MODEL", "gemma4:31b")
		primaryKey := os.Getenv("BERGET_API_KEY")
		timeoutMS := envInt("BRAIN_LLM_TIMEOUT_MS", 10000)
		timeout := time.Duration(timeoutMS) * time.Millisecond

		primary := llm.New(primaryURL, primaryKey, primaryModel, timeout)
		router := &llm.Router{Primary: primary}

		if fallbackURL := os.Getenv("BRAIN_LLM_FALLBACK_URL"); fallbackURL != "" {
			fallbackModel := envOr("BRAIN_LLM_FALLBACK_MODEL", "gemma4:31b")
			router.Fallback = llm.New(fallbackURL, "", fallbackModel, timeout)
		}
		answerComplete = router.Complete
		logger.Info("brain answer LLM configured", "primary", primaryURL, "model", primaryModel)
	}

	mcpSrv := mcp.NewServer(brainDir, &pipelineCfg, llmClient.Complete, answerComplete)
	if rerankURL := os.Getenv("BRAIN_RERANKER_URL"); rerankURL != "" {
		rerankModel := envOr("BRAIN_RERANKER_MODEL", "dengcao/Qwen3-Reranker-0.6B:F16")
		mcpSrv = mcpSrv.WithReranker(reranker.New(rerankURL, rerankModel))
		logger.Info("brain reranker configured", "url", rerankURL, "model", rerankModel)
	}

	mcpToken := os.Getenv("BRAIN_MCP_TOKEN")
	if mcpToken == "" {
		logger.Error("BRAIN_MCP_TOKEN not set")
		os.Exit(1)
	}

	ctx := context.Background()
	if watchInterval > 0 {
		watcher.Start(ctx, watcher.Config{
			BrainDir: brainDir,
			Interval: time.Duration(watchInterval) * time.Second,
			Pipeline: pipelineCfg,
		})
	}

	mux := http.NewServeMux()
	mux.HandleFunc("POST /query", h.Query)
	mux.HandleFunc("POST /write", h.Write)
	mux.HandleFunc("POST /index", h.Index)
	mux.HandleFunc("POST /ingest", h.Ingest)
	mux.HandleFunc("POST /ingest-path", h.IngestPath)
	mux.HandleFunc("POST /ingest-raw", h.IngestRaw)
	mux.HandleFunc("POST /backfill-refs", h.BackfillRefs)
	mux.HandleFunc("GET /pass-rate", h.PassRate)
	var jwtValidator *auth.Validator
	if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
		audience := os.Getenv("MCP_AUDIENCE")
		v, err := auth.NewValidator(dexURL, audience)
		if err != nil {
			logger.Error("build jwt validator", "err", err)
			os.Exit(1)
		}
		jwtValidator = v
		logger.Info("jwt auth enabled", "issuer", dexURL)
	}

	// Resource-metadata URL is only emitted on 401 when Dex OAuth is
	// configured. Static-Bearer-only deployments leave this empty so
	// clients never see an OAuth challenge.
	var resourceMetadataURL string
	if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
		resourceURL := os.Getenv("MCP_RESOURCE_URL")
		mux.HandleFunc("GET /.well-known/oauth-protected-resource",
			auth.ProtectedResourceHandler(resourceURL, dexURL))
		if resourceURL != "" {
			resourceMetadataURL = strings.TrimRight(resourceURL, "/") + "/.well-known/oauth-protected-resource"
		}
	}

	mux.Handle("/mcp", mcp.BearerAuth(mcpToken, jwtValidator, resourceMetadataURL, mcpSrv))

	// Opt-in OAuth 2.0 client_credentials flow for claude.ai's custom-MCP
	// integration UI, which has no static-Bearer field. Setting both
	// OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET enables the token exchange;
	// setting only one is misconfiguration → fail fast.
	oauthID := os.Getenv("OAUTH_CLIENT_ID")
	oauthSecret := os.Getenv("OAUTH_CLIENT_SECRET")
	switch {
	case oauthID != "" && oauthSecret != "":
		issuer := os.Getenv("MCP_RESOURCE_URL")
		if issuer == "" {
			logger.Error("OAUTH_CLIENT_ID/SECRET set but MCP_RESOURCE_URL is empty; cannot derive issuer")
			os.Exit(1)
		}
		mux.HandleFunc("GET /.well-known/oauth-authorization-server",
			oauth.MetadataHandler(issuer))
		mux.HandleFunc("POST /oauth/token", oauth.TokenHandler(oauth.TokenConfig{
			ClientID:     oauthID,
			ClientSecret: oauthSecret,
			AccessToken:  mcpToken,
		}))
		logger.Info("oauth client_credentials enabled", "issuer", strings.TrimRight(issuer, "/"))
	case oauthID == "" && oauthSecret == "":
		// disabled — that's fine
	default:
		logger.Error("OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET must be set together")
		os.Exit(1)
	}

	addr := ":" + port
	watchIntervalLog := "disabled"
	if watchInterval > 0 {
		watchIntervalLog = fmt.Sprintf("%ds", watchInterval)
	}
	logger.Info("ingestion server starting",
		"addr", addr,
		"brain_dir", brainDir,
		"llm_url", llmURL,
		"llm_model", llmModel,
		"chunk_size", chunkSize,
		"watch_interval", watchIntervalLog,
		"mcp_enabled", true,
	)
	if err := http.ListenAndServe(addr, mux); err != nil {
		logger.Error("server stopped", "err", err)
		os.Exit(1)
	}
}