From 8c87460bffbbbb8def84edbe00c2e8e11068f41d Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Fri, 1 May 2026 13:02:02 +0200 Subject: [PATCH] feat(ingestion): implement brain_ingest MCP tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps pipeline.Run with the existing LLM client. Mirrors the HTTP /ingest and /ingest-path semantics — accepts either path or content+source, validates mutual exclusion, surfaces an explicit error when the LLM client is not configured (test-mode). ctx is threaded through to pipeline.Run for cancellation. Co-Authored-By: Claude Opus 4.7 (1M context) --- ingestion/internal/mcp/handlers.go | 58 +++++++++++++++++++++++++ ingestion/internal/mcp/handlers_test.go | 43 ++++++++++++++++++ ingestion/internal/mcp/server.go | 2 + 3 files changed, 103 insertions(+) diff --git a/ingestion/internal/mcp/handlers.go b/ingestion/internal/mcp/handlers.go index 14ca3ea..4f07977 100644 --- a/ingestion/internal/mcp/handlers.go +++ b/ingestion/internal/mcp/handlers.go @@ -4,8 +4,11 @@ import ( "context" "encoding/json" "fmt" + "path/filepath" + "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/api" + "github.com/mathiasbq/hyperguild/ingestion/internal/extract" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/search" ) @@ -155,3 +158,58 @@ func (s *Server) brainIngestRaw(ctx context.Context, args json.RawMessage) (json } return json.Marshal(map[string]any{"pages": pages, "warnings": warnings}) } + +type brainIngestArgs struct { + Content string `json:"content,omitempty"` + Source string `json:"source,omitempty"` + Path string `json:"path,omitempty"` + DryRun bool `json:"dry_run,omitempty"` +} + +func (s *Server) brainIngest(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { + var a brainIngestArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.Path != "" && a.Content != "" { + return nil, fmt.Errorf("path and content+source are mutually exclusive") + } + if a.Path == "" && a.Content == "" { + return nil, fmt.Errorf("either path or content+source is required") + } + if s.pipeline.Complete == nil { + return nil, fmt.Errorf("LLM not configured: set INGEST_LLM_URL") + } + + if a.Path != "" { + text, err := extract.Text(a.Path) + if err != nil { + return nil, fmt.Errorf("extract: %w", err) + } + source := a.Source + if source == "" { + source = filepath.Base(strings.TrimSuffix(a.Path, filepath.Ext(a.Path))) + } + return s.runIngest(ctx, text, source, a.DryRun) + } + if a.Source == "" { + return nil, fmt.Errorf("source is required when content is provided") + } + return s.runIngest(ctx, a.Content, a.Source, a.DryRun) +} + +func (s *Server) runIngest(ctx context.Context, content, source string, dryRun bool) (json.RawMessage, error) { + result, err := pipeline.Run(ctx, s.pipeline, s.brainDir, content, source, dryRun) + if err != nil { + return nil, fmt.Errorf("ingest: %w", err) + } + pages := result.Pages + if pages == nil { + pages = []string{} + } + warnings := result.Warnings + if warnings == nil { + warnings = []string{} + } + return json.Marshal(map[string]any{"pages": pages, "warnings": warnings}) +} diff --git a/ingestion/internal/mcp/handlers_test.go b/ingestion/internal/mcp/handlers_test.go index 4fe352b..e248ac1 100644 --- a/ingestion/internal/mcp/handlers_test.go +++ b/ingestion/internal/mcp/handlers_test.go @@ -127,3 +127,46 @@ func TestBrainIngestRawDryRun(t *testing.T) { _, err := os.Stat(filepath.Join(brainDir, "wiki", "concepts", "test-concept.md")) assert.True(t, os.IsNotExist(err)) } + +func TestBrainIngestRejectsBoth(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil) + + resp := toolCall(t, srv, "brain_ingest", map[string]any{ + "content": "x", + "source": "y", + "path": "/tmp/foo.md", + }) + require.NotNil(t, resp["error"]) +} + +func TestBrainIngestRequiresOne(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil) + + resp := toolCall(t, srv, "brain_ingest", map[string]any{}) + require.NotNil(t, resp["error"]) +} + +func TestBrainIngestRejectsContentWithoutSource(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil) + + resp := toolCall(t, srv, "brain_ingest", map[string]any{ + "content": "x", + }) + require.NotNil(t, resp["error"]) +} + +func TestBrainIngestRequiresLLMConfigured(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil) // nil pipelineCfg → no LLM + + resp := toolCall(t, srv, "brain_ingest", map[string]any{ + "content": "some content", + "source": "test", + }) + require.NotNil(t, resp["error"]) + errObj := resp["error"].(map[string]any) + assert.Contains(t, errObj["message"].(string), "LLM not configured") +} diff --git a/ingestion/internal/mcp/server.go b/ingestion/internal/mcp/server.go index 8e631c9..9069e82 100644 --- a/ingestion/internal/mcp/server.go +++ b/ingestion/internal/mcp/server.go @@ -122,6 +122,8 @@ func (s *Server) handleCall(ctx context.Context, name string, args json.RawMessa return s.brainWrite(ctx, args) case "brain_ingest_raw": return s.brainIngestRaw(ctx, args) + case "brain_ingest": + return s.brainIngest(ctx, args) default: return nil, fmt.Errorf("unknown tool: %s", name) }