Files
hyperguild/ingestion/internal/pipeline/backfill.go

92 lines
2.5 KiB
Go

// ingestion/internal/pipeline/backfill.go
package pipeline
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
)
// BackfillRefs walks wiki/sources/ and injects source back-references into every
// concept and entity page that each source links to.
// Changes for all sources are accumulated in memory before writing, so multiple
// sources referencing the same concept are merged in one pass.
// Deduplication is handled by wiki.Merge — running this multiple times is safe.
// Returns the number of concept/entity pages written.
func BackfillRefs(ctx context.Context, brainDir string) (int, error) {
inventory, err := wiki.LoadInventory(brainDir)
if err != nil {
return 0, fmt.Errorf("load inventory: %w", err)
}
sourcesDir := filepath.Join(brainDir, "wiki", "sources")
entries, err := os.ReadDir(sourcesDir)
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, fmt.Errorf("read sources dir: %w", err)
}
// Accumulate all changes before writing: relPath → updated Page.
// Collecting first means two sources that both link the same concept
// get both refs merged before a single write.
pending := make(map[string]wiki.Page)
for _, e := range entries {
if ctx.Err() != nil {
return 0, ctx.Err()
}
if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") {
continue
}
b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name()))
if err != nil {
continue
}
sourceContent := string(b)
sourceSlug := strings.TrimSuffix(e.Name(), ".md")
sourceTitle := extractTitle(sourceContent)
if sourceTitle == "" {
sourceTitle = sourceSlug
}
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
for slug := range extractWikilinks(sourceContent) {
if slug == sourceSlug {
continue
}
pt, ok := findInInventory(slug, inventory)
if !ok {
continue
}
relPath := "wiki/" + string(pt) + "/" + slug + ".md"
// Start from already-accumulated version if we've seen this page.
page, seen := pending[relPath]
if !seen {
raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath)))
if err != nil {
continue
}
page = wiki.Page{Path: relPath, Content: string(raw)}
}
pending[relPath] = addSourceRef(page, sourceRef)
}
}
for relPath, page := range pending {
dest := filepath.Join(brainDir, filepath.FromSlash(relPath))
if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil {
return 0, fmt.Errorf("write %s: %w", relPath, err)
}
}
return len(pending), nil
}