116 lines
3.1 KiB
Go
116 lines
3.1 KiB
Go
// ingestion/internal/pipeline/refs.go
|
|
package pipeline
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
|
|
|
// injectSourceRefs finds the source page in the proposed batch, extracts its
|
|
// wikilinks, and injects a back-reference into every linked concept or entity page.
|
|
// Pages that exist on disk but are not in the current batch are loaded and
|
|
// appended so they will be updated on write.
|
|
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
|
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
|
if !found {
|
|
return pages
|
|
}
|
|
|
|
var sourceContent string
|
|
for _, p := range pages {
|
|
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
|
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
|
sourceContent = p.Content
|
|
break
|
|
}
|
|
}
|
|
|
|
linkedSlugs := extractWikilinks(sourceContent)
|
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
|
|
|
bySlug := make(map[string]int, len(pages))
|
|
for i, p := range pages {
|
|
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
|
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
|
}
|
|
}
|
|
|
|
for slug := range linkedSlugs {
|
|
if slug == sourceSlug {
|
|
continue
|
|
}
|
|
if idx, ok := bySlug[slug]; ok {
|
|
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
|
continue
|
|
}
|
|
pt, ok := findInInventory(slug, inventory)
|
|
if !ok {
|
|
continue
|
|
}
|
|
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
|
b, err := os.ReadFile(diskPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
page := wiki.Page{
|
|
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
|
Content: string(b),
|
|
}
|
|
pages = append(pages, addSourceRef(page, sourceRef))
|
|
}
|
|
|
|
return pages
|
|
}
|
|
|
|
// addSourceRef injects sourceRef into the ## Sources bullet section of page
|
|
// using wiki.Merge, which deduplicates bullets automatically.
|
|
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
|
patch := wiki.Page{
|
|
Path: page.Path,
|
|
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
|
}
|
|
return wiki.Merge(page, patch)
|
|
}
|
|
|
|
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
|
func extractWikilinks(content string) map[string]bool {
|
|
slugs := make(map[string]bool)
|
|
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
|
slugs[m[1]] = true
|
|
}
|
|
return slugs
|
|
}
|
|
|
|
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
|
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
|
for _, p := range pages {
|
|
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
|
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
|
title = extractTitle(p.Content)
|
|
if title == "" {
|
|
title = slug
|
|
}
|
|
return slug, title, true
|
|
}
|
|
}
|
|
return "", "", false
|
|
}
|
|
|
|
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
|
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
|
for pt, entries := range inventory {
|
|
for _, e := range entries {
|
|
if e.Slug == slug {
|
|
return pt, true
|
|
}
|
|
}
|
|
}
|
|
return "", false
|
|
}
|