92 lines
2.5 KiB
Go
92 lines
2.5 KiB
Go
// ingestion/internal/pipeline/backfill.go
|
|
package pipeline
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
// BackfillRefs walks wiki/sources/ and injects source back-references into every
|
|
// concept and entity page that each source links to.
|
|
// Changes for all sources are accumulated in memory before writing, so multiple
|
|
// sources referencing the same concept are merged in one pass.
|
|
// Deduplication is handled by wiki.Merge — running this multiple times is safe.
|
|
// Returns the number of concept/entity pages written.
|
|
func BackfillRefs(ctx context.Context, brainDir string) (int, error) {
|
|
inventory, err := wiki.LoadInventory(brainDir)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("load inventory: %w", err)
|
|
}
|
|
|
|
sourcesDir := filepath.Join(brainDir, "wiki", "sources")
|
|
entries, err := os.ReadDir(sourcesDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return 0, nil
|
|
}
|
|
return 0, fmt.Errorf("read sources dir: %w", err)
|
|
}
|
|
|
|
// Accumulate all changes before writing: relPath → updated Page.
|
|
// Collecting first means two sources that both link the same concept
|
|
// get both refs merged before a single write.
|
|
pending := make(map[string]wiki.Page)
|
|
|
|
for _, e := range entries {
|
|
if ctx.Err() != nil {
|
|
return 0, ctx.Err()
|
|
}
|
|
if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") {
|
|
continue
|
|
}
|
|
|
|
b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name()))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
sourceContent := string(b)
|
|
sourceSlug := strings.TrimSuffix(e.Name(), ".md")
|
|
sourceTitle := extractTitle(sourceContent)
|
|
if sourceTitle == "" {
|
|
sourceTitle = sourceSlug
|
|
}
|
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
|
|
|
for slug := range extractWikilinks(sourceContent) {
|
|
if slug == sourceSlug {
|
|
continue
|
|
}
|
|
pt, ok := findInInventory(slug, inventory)
|
|
if !ok {
|
|
continue
|
|
}
|
|
relPath := "wiki/" + string(pt) + "/" + slug + ".md"
|
|
|
|
// Start from already-accumulated version if we've seen this page.
|
|
page, seen := pending[relPath]
|
|
if !seen {
|
|
raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath)))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
page = wiki.Page{Path: relPath, Content: string(raw)}
|
|
}
|
|
pending[relPath] = addSourceRef(page, sourceRef)
|
|
}
|
|
}
|
|
|
|
for relPath, page := range pending {
|
|
dest := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
|
if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil {
|
|
return 0, fmt.Errorf("write %s: %w", relPath, err)
|
|
}
|
|
}
|
|
|
|
return len(pending), nil
|
|
}
|