feat: source back-references on concept and entity pages
After each ingestion, every concept and entity page linked from the source page gains a ## Sources entry pointing back to that source. Pages already on disk (from prior ingestions) are loaded and updated, so re-ingesting a new source accumulates references over time. Deduplication is handled by wiki.Merge's existing bullet-section logic.
This commit is contained in:
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
@@ -0,0 +1,433 @@
|
||||
# Source Back-References Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** After the LLM produces wiki pages for an ingestion, automatically inject a `## Sources` back-reference on every concept and entity page that the source page links to.
|
||||
|
||||
**Architecture:** A new `injectSourceRefs` post-processing step is inserted between `Resolve` and `mergeAll` in `pipeline.Run`. It finds the source page in the proposed batch, extracts all `[[slug|...]]` wikilinks, then calls `wiki.Merge` with a minimal patch page to add the back-reference. `wiki.Merge` already treats `## Sources` as a bullet section with deduplication — no custom section parsing is needed. For concepts/entities that exist on disk but weren't proposed in the current batch (the common case on re-ingestion), the function loads them from disk and adds them to the pages list so they are updated.
|
||||
|
||||
**Tech Stack:** Go stdlib (`regexp`, `os`, `path/filepath`, `strings`), existing `wiki.Merge` and `wiki.Page` types.
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
**New files:**
|
||||
- `ingestion/internal/pipeline/refs.go` — `injectSourceRefs`, `addSourceRef`, `extractWikilinks`, `findSourcePage`, `findInInventory`
|
||||
- `ingestion/internal/pipeline/refs_test.go` — table-driven tests
|
||||
|
||||
**Modified files:**
|
||||
- `ingestion/internal/pipeline/pipeline.go` — insert `injectSourceRefs` call between `Resolve` and `mergeAll`
|
||||
|
||||
---
|
||||
|
||||
### Task 1: `refs.go` — source back-reference injection
|
||||
|
||||
**Files:**
|
||||
- Create: `ingestion/internal/pipeline/refs_test.go`
|
||||
- Create: `ingestion/internal/pipeline/refs.go`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
```go
|
||||
// ingestion/internal/pipeline/refs_test.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
// makeInventory builds a minimal inventory for test use.
|
||||
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||
inv := map[wiki.PageType][]wiki.Entry{
|
||||
wiki.PageTypeConcept: {},
|
||||
wiki.PageTypeEntity: {},
|
||||
wiki.PageTypeSource: {},
|
||||
}
|
||||
for _, slug := range concepts {
|
||||
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||
}
|
||||
for _, slug := range entities {
|
||||
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||
}
|
||||
return inv
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||
}
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
assert.Equal(t, pages, got)
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/concepts/domain-driven-design.md",
|
||||
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
assert.Contains(t, got[1].Content, "## Sources")
|
||||
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||
brainDir := t.TempDir()
|
||||
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(conceptDir, "shape-up.md"),
|
||||
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||
0o644,
|
||||
))
|
||||
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||
},
|
||||
}
|
||||
inv := makeInventory([]string{"shape-up"}, nil)
|
||||
|
||||
got := injectSourceRefs(pages, inv, brainDir)
|
||||
|
||||
// Should have loaded shape-up.md from disk and added it with source ref.
|
||||
require.Len(t, got, 2)
|
||||
var conceptPage wiki.Page
|
||||
for _, p := range got {
|
||||
if p.Path == "wiki/concepts/shape-up.md" {
|
||||
conceptPage = p
|
||||
}
|
||||
}
|
||||
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||
// Original content preserved.
|
||||
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
// Only one page — source should not reference itself.
|
||||
assert.Len(t, got, 1)
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||
// Concept already has source ref from a prior ingestion.
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/concepts/ddd.md",
|
||||
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
// The source ref must appear exactly once.
|
||||
count := 0
|
||||
for _, line := range splitLines(got[1].Content) {
|
||||
if line == "- [[my-article|My Article]]" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/book.md",
|
||||
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/entities/ryan-singer.md",
|
||||
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
var entity wiki.Page
|
||||
for _, p := range got {
|
||||
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||
entity = p
|
||||
}
|
||||
}
|
||||
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||
}
|
||||
|
||||
func TestExtractWikilinks(t *testing.T) {
|
||||
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||
got := extractWikilinks(content)
|
||||
assert.True(t, got["foo"])
|
||||
assert.True(t, got["bar"])
|
||||
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||
}
|
||||
|
||||
// splitLines is a test helper.
|
||||
func splitLines(s string) []string {
|
||||
var out []string
|
||||
for _, l := range splitNewlines(s) {
|
||||
if l != "" {
|
||||
out = append(out, l)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func splitNewlines(s string) []string {
|
||||
var lines []string
|
||||
start := 0
|
||||
for i, c := range s {
|
||||
if c == '\n' {
|
||||
lines = append(lines, s[start:i])
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
lines = append(lines, s[start:])
|
||||
return lines
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run to verify they fail**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -run "TestInjectSourceRefs|TestExtractWikilinks" -v
|
||||
```
|
||||
Expected: compile error — `injectSourceRefs` and `extractWikilinks` not defined.
|
||||
|
||||
- [ ] **Step 3: Implement refs.go**
|
||||
|
||||
```go
|
||||
// ingestion/internal/pipeline/refs.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||
|
||||
// injectSourceRefs finds the source page in the proposed batch, extracts its wikilinks,
|
||||
// and injects a back-reference into every linked concept or entity page.
|
||||
// Pages that exist on disk but are not in the current batch are loaded and appended
|
||||
// so they will be updated on write.
|
||||
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||
if !found {
|
||||
return pages
|
||||
}
|
||||
|
||||
// Locate source page content for wikilink extraction.
|
||||
var sourceContent string
|
||||
for _, p := range pages {
|
||||
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||
sourceContent = p.Content
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
linkedSlugs := extractWikilinks(sourceContent)
|
||||
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||
|
||||
// Build slug → index map for proposed pages (excluding wiki/sources/).
|
||||
bySlug := make(map[string]int, len(pages))
|
||||
for i, p := range pages {
|
||||
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||
}
|
||||
}
|
||||
|
||||
for slug := range linkedSlugs {
|
||||
if slug == sourceSlug {
|
||||
continue // no self-reference
|
||||
}
|
||||
|
||||
if idx, ok := bySlug[slug]; ok {
|
||||
// Concept/entity is in the proposed batch — inject inline.
|
||||
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||
continue
|
||||
}
|
||||
|
||||
// Not in proposed batch — look for it in the inventory (exists on disk).
|
||||
pt, ok := findInInventory(slug, inventory)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||
b, err := os.ReadFile(diskPath)
|
||||
if err != nil {
|
||||
continue // page not found on disk; skip
|
||||
}
|
||||
page := wiki.Page{
|
||||
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||
Content: string(b),
|
||||
}
|
||||
pages = append(pages, addSourceRef(page, sourceRef))
|
||||
}
|
||||
|
||||
return pages
|
||||
}
|
||||
|
||||
// addSourceRef injects sourceRef into the ## Sources bullet section of page.
|
||||
// Uses wiki.Merge so that existing Sources entries are deduplicated and all
|
||||
// other sections are preserved unchanged.
|
||||
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||
patch := wiki.Page{
|
||||
Path: page.Path,
|
||||
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||
}
|
||||
return wiki.Merge(page, patch)
|
||||
}
|
||||
|
||||
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||
func extractWikilinks(content string) map[string]bool {
|
||||
slugs := make(map[string]bool)
|
||||
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||
slugs[m[1]] = true
|
||||
}
|
||||
return slugs
|
||||
}
|
||||
|
||||
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||
for _, p := range pages {
|
||||
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||
title = extractTitle(p.Content)
|
||||
if title == "" {
|
||||
title = slug
|
||||
}
|
||||
return slug, title, true
|
||||
}
|
||||
}
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||
for pt, entries := range inventory {
|
||||
for _, e := range entries {
|
||||
if e.Slug == slug {
|
||||
return pt, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run all pipeline tests**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||
```
|
||||
Expected: all existing tests PASS + 7 new refs tests PASS.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/refs.go ingestion/internal/pipeline/refs_test.go && git commit -m "feat(pipeline): inject source back-references into concept and entity pages"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Wire injectSourceRefs into pipeline.Run
|
||||
|
||||
**Files:**
|
||||
- Modify: `ingestion/internal/pipeline/pipeline.go`
|
||||
|
||||
- [ ] **Step 1: Insert the call**
|
||||
|
||||
In `pipeline.go`, locate:
|
||||
|
||||
```go
|
||||
resolved := Resolve(allPages, inventory)
|
||||
merged := mergeAll(resolved)
|
||||
```
|
||||
|
||||
Replace with:
|
||||
|
||||
```go
|
||||
resolved := Resolve(allPages, inventory)
|
||||
withRefs := injectSourceRefs(resolved, inventory, brainDir)
|
||||
merged := mergeAll(withRefs)
|
||||
```
|
||||
|
||||
No import changes needed — same package.
|
||||
|
||||
- [ ] **Step 2: Run all pipeline tests**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||
```
|
||||
Expected: all tests PASS. The existing `TestRun_WritesPages` and `TestRun_DryRunDoesNotWrite` use LLM mocks that return source pages with no wikilinks to concepts — `injectSourceRefs` is a no-op for them.
|
||||
|
||||
- [ ] **Step 3: Run full test suite + lint**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./... && golangci-lint run ./...
|
||||
```
|
||||
Expected: all packages PASS, 0 lint issues.
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/pipeline.go && git commit -m "feat(pipeline): wire source back-reference injection into Run"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Self-Review
|
||||
|
||||
**Spec coverage:**
|
||||
|
||||
| Requirement | Task |
|
||||
|---|---|
|
||||
| Concepts get `## Sources` back-link to ingested source | Task 1 |
|
||||
| Entities get `## Sources` back-link | Task 1 (TestInjectSourceRefs_InjectsIntoEntity) |
|
||||
| Existing pages on disk get updated with new source | Task 1 (TestInjectSourceRefs_LoadsConceptFromDisk) |
|
||||
| Re-ingestion of same source does not duplicate the ref | Task 1 (TestInjectSourceRefs_DeduplicatesOnReingestion) |
|
||||
| Source page does not reference itself | Task 1 (TestInjectSourceRefs_NoSelfReference) |
|
||||
| No-op when batch has no source page | Task 1 (TestInjectSourceRefs_NoSourcePage) |
|
||||
| Wired into Run between Resolve and mergeAll | Task 2 |
|
||||
| Full test suite and lint pass | Task 2 Step 3 |
|
||||
|
||||
**Placeholder scan:** None.
|
||||
|
||||
**Type consistency:** `injectSourceRefs([]wiki.Page, map[wiki.PageType][]wiki.Entry, string) []wiki.Page` — used identically in refs.go (definition) and pipeline.go (call site).
|
||||
@@ -58,7 +58,8 @@ func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryR
|
||||
}
|
||||
|
||||
resolved := Resolve(allPages, inventory)
|
||||
merged := mergeAll(resolved)
|
||||
withRefs := injectSourceRefs(resolved, inventory, brainDir)
|
||||
merged := mergeAll(withRefs)
|
||||
|
||||
date := time.Now().UTC().Format("2006-01-02")
|
||||
var written []string
|
||||
|
||||
115
ingestion/internal/pipeline/refs.go
Normal file
115
ingestion/internal/pipeline/refs.go
Normal file
@@ -0,0 +1,115 @@
|
||||
// ingestion/internal/pipeline/refs.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||
|
||||
// injectSourceRefs finds the source page in the proposed batch, extracts its
|
||||
// wikilinks, and injects a back-reference into every linked concept or entity page.
|
||||
// Pages that exist on disk but are not in the current batch are loaded and
|
||||
// appended so they will be updated on write.
|
||||
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||
if !found {
|
||||
return pages
|
||||
}
|
||||
|
||||
var sourceContent string
|
||||
for _, p := range pages {
|
||||
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||
sourceContent = p.Content
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
linkedSlugs := extractWikilinks(sourceContent)
|
||||
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||
|
||||
bySlug := make(map[string]int, len(pages))
|
||||
for i, p := range pages {
|
||||
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||
}
|
||||
}
|
||||
|
||||
for slug := range linkedSlugs {
|
||||
if slug == sourceSlug {
|
||||
continue
|
||||
}
|
||||
if idx, ok := bySlug[slug]; ok {
|
||||
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||
continue
|
||||
}
|
||||
pt, ok := findInInventory(slug, inventory)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||
b, err := os.ReadFile(diskPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
page := wiki.Page{
|
||||
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||
Content: string(b),
|
||||
}
|
||||
pages = append(pages, addSourceRef(page, sourceRef))
|
||||
}
|
||||
|
||||
return pages
|
||||
}
|
||||
|
||||
// addSourceRef injects sourceRef into the ## Sources bullet section of page
|
||||
// using wiki.Merge, which deduplicates bullets automatically.
|
||||
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||
patch := wiki.Page{
|
||||
Path: page.Path,
|
||||
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||
}
|
||||
return wiki.Merge(page, patch)
|
||||
}
|
||||
|
||||
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||
func extractWikilinks(content string) map[string]bool {
|
||||
slugs := make(map[string]bool)
|
||||
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||
slugs[m[1]] = true
|
||||
}
|
||||
return slugs
|
||||
}
|
||||
|
||||
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||
for _, p := range pages {
|
||||
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||
title = extractTitle(p.Content)
|
||||
if title == "" {
|
||||
title = slug
|
||||
}
|
||||
return slug, title, true
|
||||
}
|
||||
}
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||
for pt, entries := range inventory {
|
||||
for _, e := range entries {
|
||||
if e.Slug == slug {
|
||||
return pt, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
172
ingestion/internal/pipeline/refs_test.go
Normal file
172
ingestion/internal/pipeline/refs_test.go
Normal file
@@ -0,0 +1,172 @@
|
||||
// ingestion/internal/pipeline/refs_test.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||
inv := map[wiki.PageType][]wiki.Entry{
|
||||
wiki.PageTypeConcept: {},
|
||||
wiki.PageTypeEntity: {},
|
||||
wiki.PageTypeSource: {},
|
||||
}
|
||||
for _, slug := range concepts {
|
||||
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||
}
|
||||
for _, slug := range entities {
|
||||
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||
}
|
||||
return inv
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||
}
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
assert.Equal(t, pages, got)
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/concepts/domain-driven-design.md",
|
||||
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
assert.Contains(t, got[1].Content, "## Sources")
|
||||
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||
brainDir := t.TempDir()
|
||||
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(conceptDir, "shape-up.md"),
|
||||
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||
0o644,
|
||||
))
|
||||
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||
},
|
||||
}
|
||||
inv := makeInventory([]string{"shape-up"}, nil)
|
||||
|
||||
got := injectSourceRefs(pages, inv, brainDir)
|
||||
|
||||
require.Len(t, got, 2)
|
||||
var conceptPage wiki.Page
|
||||
for _, p := range got {
|
||||
if p.Path == "wiki/concepts/shape-up.md" {
|
||||
conceptPage = p
|
||||
}
|
||||
}
|
||||
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
assert.Len(t, got, 1)
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/my-article.md",
|
||||
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/concepts/ddd.md",
|
||||
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
count := 0
|
||||
for _, line := range splitLines(got[1].Content) {
|
||||
if line == "- [[my-article|My Article]]" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||
}
|
||||
|
||||
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||
pages := []wiki.Page{
|
||||
{
|
||||
Path: "wiki/sources/book.md",
|
||||
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||
},
|
||||
{
|
||||
Path: "wiki/entities/ryan-singer.md",
|
||||
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||
},
|
||||
}
|
||||
|
||||
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||
|
||||
require.Len(t, got, 2)
|
||||
var entity wiki.Page
|
||||
for _, p := range got {
|
||||
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||
entity = p
|
||||
}
|
||||
}
|
||||
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||
}
|
||||
|
||||
func TestExtractWikilinks(t *testing.T) {
|
||||
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||
got := extractWikilinks(content)
|
||||
assert.True(t, got["foo"])
|
||||
assert.True(t, got["bar"])
|
||||
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||
}
|
||||
|
||||
func splitLines(s string) []string {
|
||||
var out []string
|
||||
start := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == '\n' {
|
||||
if line := s[start:i]; line != "" {
|
||||
out = append(out, line)
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
if last := s[start:]; last != "" {
|
||||
out = append(out, last)
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user