// ingestion/internal/watcher/watcher_test.go package watcher import ( "context" "encoding/json" "fmt" "os" "path/filepath" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) // successComplete returns a valid JSON-encoded page array for any call. func successComplete(page wiki.Page) pipeline.CompleteFunc { return func(ctx context.Context, system, user string) (string, error) { b, err := json.Marshal([]wiki.Page{page}) if err != nil { return "", err } return string(b), nil } } // errorComplete always returns an error simulating an LLM failure. func errorComplete(_ context.Context, _, _ string) (string, error) { return "", fmt.Errorf("LLM unavailable") } func setupBrainDir(t *testing.T) string { t.Helper() brainDir := t.TempDir() for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources", "raw"} { require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755)) } return brainDir } func TestStart_ProcessesFile(t *testing.T) { brainDir := setupBrainDir(t) // Place a .md file in raw/. rawFile := filepath.Join(brainDir, "raw", "shape-up-book.md") require.NoError(t, os.WriteFile(rawFile, []byte("Content about Shape Up."), 0o644)) date := time.Now().UTC().Format("2006-01-02") wikiPage := wiki.Page{ Path: "wiki/sources/shape-up-book.md", Content: "---\ntitle: Shape Up Book\ntype: article\ndomain: product-management\ndate_ingested: " + date + "\nlast_updated: " + date + "\naliases:\n - Shape Up Book\n---\n\n## Summary\n\nA book about Shape Up.\n", } cfg := Config{ BrainDir: brainDir, Interval: 50 * time.Millisecond, Pipeline: pipeline.Config{ Complete: successComplete(wikiPage), ChunkSize: 0, Schema: "# Schema\nThree page types.", }, } ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() Start(ctx, cfg) // Poll until the file is moved to processed/. processedPath := filepath.Join(brainDir, "raw", "processed", date, "shape-up-book.md") var found bool deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { if _, err := os.Stat(processedPath); err == nil { found = true break } time.Sleep(20 * time.Millisecond) } require.True(t, found, "file should be copied to processed/") // Original file should still exist (copy, not move — keeps Obsidian vault intact). _, err := os.Stat(rawFile) assert.NoError(t, err, "original file should remain in raw/") // A .processed marker should exist next to the original. _, err = os.Stat(rawFile + ".processed") assert.NoError(t, err, ".processed marker should be written") // Wiki page should exist. wikiPath := filepath.Join(brainDir, "wiki", "sources", "shape-up-book.md") _, err = os.Stat(wikiPath) assert.NoError(t, err, "wiki page should be written") // log.md should contain an ingest record. logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md")) require.NoError(t, err) assert.Contains(t, string(logContent), "— ingest") } func TestStart_MovesToFailedOnError(t *testing.T) { brainDir := setupBrainDir(t) rawFile := filepath.Join(brainDir, "raw", "bad-file.md") require.NoError(t, os.WriteFile(rawFile, []byte("Some content."), 0o644)) cfg := Config{ BrainDir: brainDir, Interval: 50 * time.Millisecond, Pipeline: pipeline.Config{ Complete: errorComplete, ChunkSize: 0, Schema: "# Schema\nThree page types.", }, } ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() Start(ctx, cfg) // Poll until the file is moved to failed/. failedPath := filepath.Join(brainDir, "raw", "failed", "bad-file.md") var found bool deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { if _, err := os.Stat(failedPath); err == nil { found = true break } time.Sleep(20 * time.Millisecond) } require.True(t, found, "file should be copied to failed/") // Original file should still exist (copy, not move — keeps Obsidian vault intact). _, err := os.Stat(rawFile) assert.NoError(t, err, "original file should remain in raw/") // A .failed marker should exist next to the original. _, err = os.Stat(rawFile + ".failed") assert.NoError(t, err, ".failed marker should be written") // log.md should contain a watcher error entry. logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md")) require.NoError(t, err) assert.Contains(t, string(logContent), "— watcher error") assert.Contains(t, string(logContent), "bad-file.md") } func TestDeriveSource(t *testing.T) { tests := []struct { filename string want string }{ {"shape-up-book.md", "Shape Up Book"}, {"raft-consensus.txt", "Raft Consensus"}, {"my-note.md", "My Note"}, {"single.md", "Single"}, {"no-extension", "No Extension"}, } for _, tc := range tests { t.Run(tc.filename, func(t *testing.T) { got := deriveSource(tc.filename) assert.Equal(t, tc.want, got) }) } } func TestProcessDir_SkipsSubdirs(t *testing.T) { brainDir := setupBrainDir(t) // Create processed/ and failed/ subdirs with files inside. for _, sub := range []string{"processed/2026-04-22", "failed"} { require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "raw", sub), 0o755)) } processedFile := filepath.Join(brainDir, "raw", "processed", "2026-04-22", "old-file.md") failedFile := filepath.Join(brainDir, "raw", "failed", "broken-file.md") require.NoError(t, os.WriteFile(processedFile, []byte("old"), 0o644)) require.NoError(t, os.WriteFile(failedFile, []byte("broken"), 0o644)) // Also place a valid file in raw/ root that should be processed. validFile := filepath.Join(brainDir, "raw", "valid.md") require.NoError(t, os.WriteFile(validFile, []byte("valid content"), 0o644)) date := time.Now().UTC().Format("2006-01-02") // Track which sources were passed to Complete. var processedSources []string completeFn := func(ctx context.Context, system, user string) (string, error) { // Record that this was called; return a minimal valid page. page := wiki.Page{ Path: "wiki/sources/valid.md", Content: "---\ntitle: Valid\n---\n\n## Summary\n\nValid.\n", } b, _ := json.Marshal([]wiki.Page{page}) processedSources = append(processedSources, "called") return string(b), nil } cfg := Config{ BrainDir: brainDir, Interval: time.Hour, // not used; we call processDir directly Pipeline: pipeline.Config{ Complete: completeFn, ChunkSize: 0, Schema: "# Schema\nThree page types.", }, } errs := processDir(context.Background(), cfg, date) assert.Empty(t, errs, "no errors expected") // Complete should have been called exactly once (for valid.md, not for files in subdirs). assert.Len(t, processedSources, 1, "only the file in raw/ root should be processed") // Files in processed/ and failed/ must remain untouched. _, err := os.Stat(processedFile) assert.NoError(t, err, "processed subdir file should be untouched") _, err = os.Stat(failedFile) assert.NoError(t, err, "failed subdir file should be untouched") }