From 0a075088b25b0797366b083435da6793ebcb04e6 Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Thu, 23 Apr 2026 16:33:41 +0200 Subject: [PATCH] docs: add source back-references implementation plan --- .../plans/2026-04-23-source-backrefs.md | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-23-source-backrefs.md diff --git a/docs/superpowers/plans/2026-04-23-source-backrefs.md b/docs/superpowers/plans/2026-04-23-source-backrefs.md new file mode 100644 index 0000000..8d05e47 --- /dev/null +++ b/docs/superpowers/plans/2026-04-23-source-backrefs.md @@ -0,0 +1,433 @@ +# Source Back-References Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** After the LLM produces wiki pages for an ingestion, automatically inject a `## Sources` back-reference on every concept and entity page that the source page links to. + +**Architecture:** A new `injectSourceRefs` post-processing step is inserted between `Resolve` and `mergeAll` in `pipeline.Run`. It finds the source page in the proposed batch, extracts all `[[slug|...]]` wikilinks, then calls `wiki.Merge` with a minimal patch page to add the back-reference. `wiki.Merge` already treats `## Sources` as a bullet section with deduplication — no custom section parsing is needed. For concepts/entities that exist on disk but weren't proposed in the current batch (the common case on re-ingestion), the function loads them from disk and adds them to the pages list so they are updated. + +**Tech Stack:** Go stdlib (`regexp`, `os`, `path/filepath`, `strings`), existing `wiki.Merge` and `wiki.Page` types. + +--- + +## File Structure + +**New files:** +- `ingestion/internal/pipeline/refs.go` — `injectSourceRefs`, `addSourceRef`, `extractWikilinks`, `findSourcePage`, `findInInventory` +- `ingestion/internal/pipeline/refs_test.go` — table-driven tests + +**Modified files:** +- `ingestion/internal/pipeline/pipeline.go` — insert `injectSourceRefs` call between `Resolve` and `mergeAll` + +--- + +### Task 1: `refs.go` — source back-reference injection + +**Files:** +- Create: `ingestion/internal/pipeline/refs_test.go` +- Create: `ingestion/internal/pipeline/refs.go` + +- [ ] **Step 1: Write the failing tests** + +```go +// ingestion/internal/pipeline/refs_test.go +package pipeline + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" +) + +// makeInventory builds a minimal inventory for test use. +func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry { + inv := map[wiki.PageType][]wiki.Entry{ + wiki.PageTypeConcept: {}, + wiki.PageTypeEntity: {}, + wiki.PageTypeSource: {}, + } + for _, slug := range concepts { + inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug}) + } + for _, slug := range entities { + inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug}) + } + return inv +} + +func TestInjectSourceRefs_NoSourcePage(t *testing.T) { + pages := []wiki.Page{ + {Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"}, + } + got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir()) + assert.Equal(t, pages, got) +} + +func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) { + pages := []wiki.Page{ + { + Path: "wiki/sources/my-article.md", + Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n", + }, + { + Path: "wiki/concepts/domain-driven-design.md", + Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n", + }, + } + + got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir()) + + require.Len(t, got, 2) + assert.Contains(t, got[1].Content, "## Sources") + assert.Contains(t, got[1].Content, "[[my-article|My Article]]") +} + +func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) { + brainDir := t.TempDir() + conceptDir := filepath.Join(brainDir, "wiki", "concepts") + require.NoError(t, os.MkdirAll(conceptDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(conceptDir, "shape-up.md"), + []byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"), + 0o644, + )) + + pages := []wiki.Page{ + { + Path: "wiki/sources/my-article.md", + Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n", + }, + } + inv := makeInventory([]string{"shape-up"}, nil) + + got := injectSourceRefs(pages, inv, brainDir) + + // Should have loaded shape-up.md from disk and added it with source ref. + require.Len(t, got, 2) + var conceptPage wiki.Page + for _, p := range got { + if p.Path == "wiki/concepts/shape-up.md" { + conceptPage = p + } + } + assert.Contains(t, conceptPage.Content, "## Sources") + assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]") + // Original content preserved. + assert.Contains(t, conceptPage.Content, "## Definition") +} + +func TestInjectSourceRefs_NoSelfReference(t *testing.T) { + pages := []wiki.Page{ + { + Path: "wiki/sources/my-article.md", + Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n", + }, + } + + got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir()) + + // Only one page — source should not reference itself. + assert.Len(t, got, 1) +} + +func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) { + // Concept already has source ref from a prior ingestion. + pages := []wiki.Page{ + { + Path: "wiki/sources/my-article.md", + Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n", + }, + { + Path: "wiki/concepts/ddd.md", + Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n", + }, + } + + got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir()) + + require.Len(t, got, 2) + // The source ref must appear exactly once. + count := 0 + for _, line := range splitLines(got[1].Content) { + if line == "- [[my-article|My Article]]" { + count++ + } + } + assert.Equal(t, 1, count, "source ref should appear exactly once") +} + +func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) { + pages := []wiki.Page{ + { + Path: "wiki/sources/book.md", + Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n", + }, + { + Path: "wiki/entities/ryan-singer.md", + Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n", + }, + } + + got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir()) + + require.Len(t, got, 2) + var entity wiki.Page + for _, p := range got { + if p.Path == "wiki/entities/ryan-singer.md" { + entity = p + } + } + assert.Contains(t, entity.Content, "[[book|Book]]") +} + +func TestExtractWikilinks(t *testing.T) { + content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]." + got := extractWikilinks(content) + assert.True(t, got["foo"]) + assert.True(t, got["bar"]) + assert.Len(t, got, 2, "duplicate slugs should be deduplicated") +} + +// splitLines is a test helper. +func splitLines(s string) []string { + var out []string + for _, l := range splitNewlines(s) { + if l != "" { + out = append(out, l) + } + } + return out +} + +func splitNewlines(s string) []string { + var lines []string + start := 0 + for i, c := range s { + if c == '\n' { + lines = append(lines, s[start:i]) + start = i + 1 + } + } + lines = append(lines, s[start:]) + return lines +} +``` + +- [ ] **Step 2: Run to verify they fail** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -run "TestInjectSourceRefs|TestExtractWikilinks" -v +``` +Expected: compile error — `injectSourceRefs` and `extractWikilinks` not defined. + +- [ ] **Step 3: Implement refs.go** + +```go +// ingestion/internal/pipeline/refs.go +package pipeline + +import ( + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" +) + +var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`) + +// injectSourceRefs finds the source page in the proposed batch, extracts its wikilinks, +// and injects a back-reference into every linked concept or entity page. +// Pages that exist on disk but are not in the current batch are loaded and appended +// so they will be updated on write. +func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page { + sourceSlug, sourceTitle, found := findSourcePage(pages) + if !found { + return pages + } + + // Locate source page content for wikilink extraction. + var sourceContent string + for _, p := range pages { + if strings.HasPrefix(p.Path, "wiki/sources/") && + strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug { + sourceContent = p.Content + break + } + } + + linkedSlugs := extractWikilinks(sourceContent) + sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]" + + // Build slug → index map for proposed pages (excluding wiki/sources/). + bySlug := make(map[string]int, len(pages)) + for i, p := range pages { + if !strings.HasPrefix(p.Path, "wiki/sources/") { + bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i + } + } + + for slug := range linkedSlugs { + if slug == sourceSlug { + continue // no self-reference + } + + if idx, ok := bySlug[slug]; ok { + // Concept/entity is in the proposed batch — inject inline. + pages[idx] = addSourceRef(pages[idx], sourceRef) + continue + } + + // Not in proposed batch — look for it in the inventory (exists on disk). + pt, ok := findInInventory(slug, inventory) + if !ok { + continue + } + diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md") + b, err := os.ReadFile(diskPath) + if err != nil { + continue // page not found on disk; skip + } + page := wiki.Page{ + Path: "wiki/" + string(pt) + "/" + slug + ".md", + Content: string(b), + } + pages = append(pages, addSourceRef(page, sourceRef)) + } + + return pages +} + +// addSourceRef injects sourceRef into the ## Sources bullet section of page. +// Uses wiki.Merge so that existing Sources entries are deduplicated and all +// other sections are preserved unchanged. +func addSourceRef(page wiki.Page, sourceRef string) wiki.Page { + patch := wiki.Page{ + Path: page.Path, + Content: "\n## Sources\n\n" + sourceRef + "\n", + } + return wiki.Merge(page, patch) +} + +// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content. +func extractWikilinks(content string) map[string]bool { + slugs := make(map[string]bool) + for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) { + slugs[m[1]] = true + } + return slugs +} + +// findSourcePage returns the slug and title of the first wiki/sources/ page in pages. +func findSourcePage(pages []wiki.Page) (slug, title string, found bool) { + for _, p := range pages { + if strings.HasPrefix(p.Path, "wiki/sources/") { + slug = strings.TrimSuffix(filepath.Base(p.Path), ".md") + title = extractTitle(p.Content) + if title == "" { + title = slug + } + return slug, title, true + } + } + return "", "", false +} + +// findInInventory returns the PageType for a slug if it appears in the inventory. +func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) { + for pt, entries := range inventory { + for _, e := range entries { + if e.Slug == slug { + return pt, true + } + } + } + return "", false +} +``` + +- [ ] **Step 4: Run all pipeline tests** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v +``` +Expected: all existing tests PASS + 7 new refs tests PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/refs.go ingestion/internal/pipeline/refs_test.go && git commit -m "feat(pipeline): inject source back-references into concept and entity pages" +``` + +--- + +### Task 2: Wire injectSourceRefs into pipeline.Run + +**Files:** +- Modify: `ingestion/internal/pipeline/pipeline.go` + +- [ ] **Step 1: Insert the call** + +In `pipeline.go`, locate: + +```go + resolved := Resolve(allPages, inventory) + merged := mergeAll(resolved) +``` + +Replace with: + +```go + resolved := Resolve(allPages, inventory) + withRefs := injectSourceRefs(resolved, inventory, brainDir) + merged := mergeAll(withRefs) +``` + +No import changes needed — same package. + +- [ ] **Step 2: Run all pipeline tests** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v +``` +Expected: all tests PASS. The existing `TestRun_WritesPages` and `TestRun_DryRunDoesNotWrite` use LLM mocks that return source pages with no wikilinks to concepts — `injectSourceRefs` is a no-op for them. + +- [ ] **Step 3: Run full test suite + lint** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./... && golangci-lint run ./... +``` +Expected: all packages PASS, 0 lint issues. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/pipeline.go && git commit -m "feat(pipeline): wire source back-reference injection into Run" +``` + +--- + +## Self-Review + +**Spec coverage:** + +| Requirement | Task | +|---|---| +| Concepts get `## Sources` back-link to ingested source | Task 1 | +| Entities get `## Sources` back-link | Task 1 (TestInjectSourceRefs_InjectsIntoEntity) | +| Existing pages on disk get updated with new source | Task 1 (TestInjectSourceRefs_LoadsConceptFromDisk) | +| Re-ingestion of same source does not duplicate the ref | Task 1 (TestInjectSourceRefs_DeduplicatesOnReingestion) | +| Source page does not reference itself | Task 1 (TestInjectSourceRefs_NoSelfReference) | +| No-op when batch has no source page | Task 1 (TestInjectSourceRefs_NoSourcePage) | +| Wired into Run between Resolve and mergeAll | Task 2 | +| Full test suite and lint pass | Task 2 Step 3 | + +**Placeholder scan:** None. + +**Type consistency:** `injectSourceRefs([]wiki.Page, map[wiki.PageType][]wiki.Entry, string) []wiki.Page` — used identically in refs.go (definition) and pipeline.go (call site).