diff --git a/ingestion/cmd/server/main.go b/ingestion/cmd/server/main.go index 194f54a..a44a02d 100644 --- a/ingestion/cmd/server/main.go +++ b/ingestion/cmd/server/main.go @@ -68,6 +68,7 @@ func main() { mux.HandleFunc("POST /write", h.Write) mux.HandleFunc("POST /ingest", h.Ingest) mux.HandleFunc("POST /ingest-path", h.IngestPath) + mux.HandleFunc("POST /backfill-refs", h.BackfillRefs) addr := ":" + port watchIntervalLog := "disabled" diff --git a/ingestion/internal/api/handler.go b/ingestion/internal/api/handler.go index 141ca0a..961007d 100644 --- a/ingestion/internal/api/handler.go +++ b/ingestion/internal/api/handler.go @@ -272,6 +272,18 @@ func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) { writeJSON(w, ingestResponse{Pages: allPages, Warnings: allWarnings}) } +// BackfillRefs handles POST /backfill-refs — injects source back-references +// into all concept and entity pages based on existing wiki/sources/ pages. +func (h *Handler) BackfillRefs(w http.ResponseWriter, r *http.Request) { + n, err := pipeline.BackfillRefs(r.Context(), h.brainDir) + if err != nil { + h.logger.Error("backfill-refs failed", "err", err) + writeError(w, http.StatusInternalServerError, "backfill error") + return + } + writeJSON(w, map[string]int{"updated": n}) +} + func writeJSON(w http.ResponseWriter, v any) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(v) //nolint:errcheck diff --git a/ingestion/internal/pipeline/backfill.go b/ingestion/internal/pipeline/backfill.go new file mode 100644 index 0000000..dc20c94 --- /dev/null +++ b/ingestion/internal/pipeline/backfill.go @@ -0,0 +1,91 @@ +// ingestion/internal/pipeline/backfill.go +package pipeline + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" +) + +// BackfillRefs walks wiki/sources/ and injects source back-references into every +// concept and entity page that each source links to. +// Changes for all sources are accumulated in memory before writing, so multiple +// sources referencing the same concept are merged in one pass. +// Deduplication is handled by wiki.Merge — running this multiple times is safe. +// Returns the number of concept/entity pages written. +func BackfillRefs(ctx context.Context, brainDir string) (int, error) { + inventory, err := wiki.LoadInventory(brainDir) + if err != nil { + return 0, fmt.Errorf("load inventory: %w", err) + } + + sourcesDir := filepath.Join(brainDir, "wiki", "sources") + entries, err := os.ReadDir(sourcesDir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return 0, fmt.Errorf("read sources dir: %w", err) + } + + // Accumulate all changes before writing: relPath → updated Page. + // Collecting first means two sources that both link the same concept + // get both refs merged before a single write. + pending := make(map[string]wiki.Page) + + for _, e := range entries { + if ctx.Err() != nil { + return 0, ctx.Err() + } + if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") { + continue + } + + b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name())) + if err != nil { + continue + } + sourceContent := string(b) + sourceSlug := strings.TrimSuffix(e.Name(), ".md") + sourceTitle := extractTitle(sourceContent) + if sourceTitle == "" { + sourceTitle = sourceSlug + } + sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]" + + for slug := range extractWikilinks(sourceContent) { + if slug == sourceSlug { + continue + } + pt, ok := findInInventory(slug, inventory) + if !ok { + continue + } + relPath := "wiki/" + string(pt) + "/" + slug + ".md" + + // Start from already-accumulated version if we've seen this page. + page, seen := pending[relPath] + if !seen { + raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath))) + if err != nil { + continue + } + page = wiki.Page{Path: relPath, Content: string(raw)} + } + pending[relPath] = addSourceRef(page, sourceRef) + } + } + + for relPath, page := range pending { + dest := filepath.Join(brainDir, filepath.FromSlash(relPath)) + if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil { + return 0, fmt.Errorf("write %s: %w", relPath, err) + } + } + + return len(pending), nil +} diff --git a/ingestion/internal/pipeline/backfill_test.go b/ingestion/internal/pipeline/backfill_test.go new file mode 100644 index 0000000..8f23f99 --- /dev/null +++ b/ingestion/internal/pipeline/backfill_test.go @@ -0,0 +1,107 @@ +// ingestion/internal/pipeline/backfill_test.go +package pipeline + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupBrainDir(t *testing.T) string { + t.Helper() + dir := t.TempDir() + for _, sub := range []string{"wiki/sources", "wiki/concepts", "wiki/entities"} { + require.NoError(t, os.MkdirAll(filepath.Join(dir, sub), 0o755)) + } + return dir +} + +func writeFile(t *testing.T, path, content string) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) +} + +func TestBackfillRefs_UpdatesConcept(t *testing.T) { + dir := setupBrainDir(t) + writeFile(t, filepath.Join(dir, "wiki/sources/shape-up.md"), + "---\ntitle: Shape Up\n---\n\n## Summary\n\nSee [[betting|Betting]].\n") + writeFile(t, filepath.Join(dir, "wiki/concepts/betting.md"), + "---\ntitle: Betting\n---\n\n## Definition\n\nA resource allocation technique.\n") + + n, err := BackfillRefs(context.Background(), dir) + require.NoError(t, err) + assert.Equal(t, 1, n) + + got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/betting.md")) + require.NoError(t, err) + assert.Contains(t, string(got), "## Sources") + assert.Contains(t, string(got), "[[shape-up|Shape Up]]") + assert.Contains(t, string(got), "## Definition") // original content preserved +} + +func TestBackfillRefs_Deduplication(t *testing.T) { + dir := setupBrainDir(t) + writeFile(t, filepath.Join(dir, "wiki/sources/shape-up.md"), + "---\ntitle: Shape Up\n---\n\n## Summary\n\nSee [[betting|Betting]].\n") + writeFile(t, filepath.Join(dir, "wiki/concepts/betting.md"), + "---\ntitle: Betting\n---\n\n## Definition\n\nA technique.\n") + + // Run twice — should not duplicate the ref. + _, err := BackfillRefs(context.Background(), dir) + require.NoError(t, err) + _, err = BackfillRefs(context.Background(), dir) + require.NoError(t, err) + + got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/betting.md")) + require.NoError(t, err) + + count := 0 + for _, line := range splitLines(string(got)) { + if line == "- [[shape-up|Shape Up]]" { + count++ + } + } + assert.Equal(t, 1, count, "ref should appear exactly once after two runs") +} + +func TestBackfillRefs_MultipleSources(t *testing.T) { + dir := setupBrainDir(t) + writeFile(t, filepath.Join(dir, "wiki/sources/book-a.md"), + "---\ntitle: Book A\n---\n\n## Summary\n\nSee [[shaping|Shaping]].\n") + writeFile(t, filepath.Join(dir, "wiki/sources/book-b.md"), + "---\ntitle: Book B\n---\n\n## Summary\n\nAlso [[shaping|Shaping]].\n") + writeFile(t, filepath.Join(dir, "wiki/concepts/shaping.md"), + "---\ntitle: Shaping\n---\n\n## Definition\n\nA design activity.\n") + + n, err := BackfillRefs(context.Background(), dir) + require.NoError(t, err) + assert.Equal(t, 1, n) // one concept page written + + got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/shaping.md")) + require.NoError(t, err) + assert.Contains(t, string(got), "[[book-a|Book A]]") + assert.Contains(t, string(got), "[[book-b|Book B]]") +} + +func TestBackfillRefs_NoSourcesDir(t *testing.T) { + dir := t.TempDir() // no wiki/sources subdir + n, err := BackfillRefs(context.Background(), dir) + require.NoError(t, err) + assert.Equal(t, 0, n) +} + +func TestBackfillRefs_SkipsUnknownSlugs(t *testing.T) { + dir := setupBrainDir(t) + // Source links to a slug not in inventory and not on disk. + writeFile(t, filepath.Join(dir, "wiki/sources/article.md"), + "---\ntitle: Article\n---\n\n## Summary\n\nSee [[ghost-slug|Ghost]].\n") + + n, err := BackfillRefs(context.Background(), dir) + require.NoError(t, err) + assert.Equal(t, 0, n) +}