Polls brain/raw/ on a configurable ticker, derives human-readable source names from filenames, runs the pipeline, and moves files to processed/YYYY-MM-DD/ on success or failed/ on error with a log.md entry. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
220 lines
6.7 KiB
Go
220 lines
6.7 KiB
Go
// ingestion/internal/watcher/watcher_test.go
|
|
package watcher
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
// successComplete returns a valid JSON-encoded page array for any call.
|
|
func successComplete(page wiki.Page) pipeline.CompleteFunc {
|
|
return func(ctx context.Context, system, user string) (string, error) {
|
|
b, err := json.Marshal([]wiki.Page{page})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(b), nil
|
|
}
|
|
}
|
|
|
|
// errorComplete always returns an error simulating an LLM failure.
|
|
func errorComplete(_ context.Context, _, _ string) (string, error) {
|
|
return "", fmt.Errorf("LLM unavailable")
|
|
}
|
|
|
|
func setupBrainDir(t *testing.T) string {
|
|
t.Helper()
|
|
brainDir := t.TempDir()
|
|
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources", "raw"} {
|
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
|
}
|
|
return brainDir
|
|
}
|
|
|
|
func TestStart_ProcessesFile(t *testing.T) {
|
|
brainDir := setupBrainDir(t)
|
|
|
|
// Place a .md file in raw/.
|
|
rawFile := filepath.Join(brainDir, "raw", "shape-up-book.md")
|
|
require.NoError(t, os.WriteFile(rawFile, []byte("Content about Shape Up."), 0o644))
|
|
|
|
date := time.Now().UTC().Format("2006-01-02")
|
|
wikiPage := wiki.Page{
|
|
Path: "wiki/sources/shape-up-book.md",
|
|
Content: "---\ntitle: Shape Up Book\ntype: article\ndomain: product-management\ndate_ingested: " + date + "\nlast_updated: " + date + "\naliases:\n - Shape Up Book\n---\n\n## Summary\n\nA book about Shape Up.\n",
|
|
}
|
|
|
|
cfg := Config{
|
|
BrainDir: brainDir,
|
|
Interval: 50 * time.Millisecond,
|
|
Pipeline: pipeline.Config{
|
|
Complete: successComplete(wikiPage),
|
|
ChunkSize: 0,
|
|
Schema: "# Schema\nThree page types.",
|
|
},
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
|
defer cancel()
|
|
|
|
Start(ctx, cfg)
|
|
|
|
// Poll until the file is moved to processed/.
|
|
processedPath := filepath.Join(brainDir, "raw", "processed", date, "shape-up-book.md")
|
|
var found bool
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
if _, err := os.Stat(processedPath); err == nil {
|
|
found = true
|
|
break
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
require.True(t, found, "file should be moved to processed/")
|
|
|
|
// Original file should be gone.
|
|
_, err := os.Stat(rawFile)
|
|
assert.True(t, os.IsNotExist(err), "original file should be gone from raw/")
|
|
|
|
// Wiki page should exist.
|
|
wikiPath := filepath.Join(brainDir, "wiki", "sources", "shape-up-book.md")
|
|
_, err = os.Stat(wikiPath)
|
|
assert.NoError(t, err, "wiki page should be written")
|
|
|
|
// log.md should contain an ingest record.
|
|
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
|
require.NoError(t, err)
|
|
assert.Contains(t, string(logContent), "— ingest")
|
|
}
|
|
|
|
func TestStart_MovesToFailedOnError(t *testing.T) {
|
|
brainDir := setupBrainDir(t)
|
|
|
|
rawFile := filepath.Join(brainDir, "raw", "bad-file.md")
|
|
require.NoError(t, os.WriteFile(rawFile, []byte("Some content."), 0o644))
|
|
|
|
cfg := Config{
|
|
BrainDir: brainDir,
|
|
Interval: 50 * time.Millisecond,
|
|
Pipeline: pipeline.Config{
|
|
Complete: errorComplete,
|
|
ChunkSize: 0,
|
|
Schema: "# Schema\nThree page types.",
|
|
},
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
|
defer cancel()
|
|
|
|
Start(ctx, cfg)
|
|
|
|
// Poll until the file is moved to failed/.
|
|
failedPath := filepath.Join(brainDir, "raw", "failed", "bad-file.md")
|
|
var found bool
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
if _, err := os.Stat(failedPath); err == nil {
|
|
found = true
|
|
break
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
require.True(t, found, "file should be moved to failed/")
|
|
|
|
// Original file should be gone from raw/.
|
|
_, err := os.Stat(rawFile)
|
|
assert.True(t, os.IsNotExist(err), "original file should be gone from raw/")
|
|
|
|
// log.md should contain a watcher error entry.
|
|
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
|
require.NoError(t, err)
|
|
assert.Contains(t, string(logContent), "— watcher error")
|
|
assert.Contains(t, string(logContent), "bad-file.md")
|
|
}
|
|
|
|
func TestDeriveSource(t *testing.T) {
|
|
tests := []struct {
|
|
filename string
|
|
want string
|
|
}{
|
|
{"shape-up-book.md", "Shape Up Book"},
|
|
{"raft-consensus.txt", "Raft Consensus"},
|
|
{"my-note.md", "My Note"},
|
|
{"single.md", "Single"},
|
|
{"no-extension", "No Extension"},
|
|
}
|
|
|
|
for _, tc := range tests {
|
|
t.Run(tc.filename, func(t *testing.T) {
|
|
got := deriveSource(tc.filename)
|
|
assert.Equal(t, tc.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestProcessDir_SkipsSubdirs(t *testing.T) {
|
|
brainDir := setupBrainDir(t)
|
|
|
|
// Create processed/ and failed/ subdirs with files inside.
|
|
for _, sub := range []string{"processed/2026-04-22", "failed"} {
|
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "raw", sub), 0o755))
|
|
}
|
|
|
|
processedFile := filepath.Join(brainDir, "raw", "processed", "2026-04-22", "old-file.md")
|
|
failedFile := filepath.Join(brainDir, "raw", "failed", "broken-file.md")
|
|
require.NoError(t, os.WriteFile(processedFile, []byte("old"), 0o644))
|
|
require.NoError(t, os.WriteFile(failedFile, []byte("broken"), 0o644))
|
|
|
|
// Also place a valid file in raw/ root that should be processed.
|
|
validFile := filepath.Join(brainDir, "raw", "valid.md")
|
|
require.NoError(t, os.WriteFile(validFile, []byte("valid content"), 0o644))
|
|
|
|
date := time.Now().UTC().Format("2006-01-02")
|
|
|
|
// Track which sources were passed to Complete.
|
|
var processedSources []string
|
|
completeFn := func(ctx context.Context, system, user string) (string, error) {
|
|
// Record that this was called; return a minimal valid page.
|
|
page := wiki.Page{
|
|
Path: "wiki/sources/valid.md",
|
|
Content: "---\ntitle: Valid\n---\n\n## Summary\n\nValid.\n",
|
|
}
|
|
b, _ := json.Marshal([]wiki.Page{page})
|
|
processedSources = append(processedSources, "called")
|
|
return string(b), nil
|
|
}
|
|
|
|
cfg := Config{
|
|
BrainDir: brainDir,
|
|
Interval: time.Hour, // not used; we call processDir directly
|
|
Pipeline: pipeline.Config{
|
|
Complete: completeFn,
|
|
ChunkSize: 0,
|
|
Schema: "# Schema\nThree page types.",
|
|
},
|
|
}
|
|
|
|
errs := processDir(context.Background(), cfg, date)
|
|
assert.Empty(t, errs, "no errors expected")
|
|
|
|
// Complete should have been called exactly once (for valid.md, not for files in subdirs).
|
|
assert.Len(t, processedSources, 1, "only the file in raw/ root should be processed")
|
|
|
|
// Files in processed/ and failed/ must remain untouched.
|
|
_, err := os.Stat(processedFile)
|
|
assert.NoError(t, err, "processed subdir file should be untouched")
|
|
_, err = os.Stat(failedFile)
|
|
assert.NoError(t, err, "failed subdir file should be untouched")
|
|
}
|