feat(pipeline): add POST /backfill-refs endpoint to retroactively inject source back-references
This commit is contained in:
91
ingestion/internal/pipeline/backfill.go
Normal file
91
ingestion/internal/pipeline/backfill.go
Normal file
@@ -0,0 +1,91 @@
|
||||
// ingestion/internal/pipeline/backfill.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
// BackfillRefs walks wiki/sources/ and injects source back-references into every
|
||||
// concept and entity page that each source links to.
|
||||
// Changes for all sources are accumulated in memory before writing, so multiple
|
||||
// sources referencing the same concept are merged in one pass.
|
||||
// Deduplication is handled by wiki.Merge — running this multiple times is safe.
|
||||
// Returns the number of concept/entity pages written.
|
||||
func BackfillRefs(ctx context.Context, brainDir string) (int, error) {
|
||||
inventory, err := wiki.LoadInventory(brainDir)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("load inventory: %w", err)
|
||||
}
|
||||
|
||||
sourcesDir := filepath.Join(brainDir, "wiki", "sources")
|
||||
entries, err := os.ReadDir(sourcesDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return 0, nil
|
||||
}
|
||||
return 0, fmt.Errorf("read sources dir: %w", err)
|
||||
}
|
||||
|
||||
// Accumulate all changes before writing: relPath → updated Page.
|
||||
// Collecting first means two sources that both link the same concept
|
||||
// get both refs merged before a single write.
|
||||
pending := make(map[string]wiki.Page)
|
||||
|
||||
for _, e := range entries {
|
||||
if ctx.Err() != nil {
|
||||
return 0, ctx.Err()
|
||||
}
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") {
|
||||
continue
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name()))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
sourceContent := string(b)
|
||||
sourceSlug := strings.TrimSuffix(e.Name(), ".md")
|
||||
sourceTitle := extractTitle(sourceContent)
|
||||
if sourceTitle == "" {
|
||||
sourceTitle = sourceSlug
|
||||
}
|
||||
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||
|
||||
for slug := range extractWikilinks(sourceContent) {
|
||||
if slug == sourceSlug {
|
||||
continue
|
||||
}
|
||||
pt, ok := findInInventory(slug, inventory)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
relPath := "wiki/" + string(pt) + "/" + slug + ".md"
|
||||
|
||||
// Start from already-accumulated version if we've seen this page.
|
||||
page, seen := pending[relPath]
|
||||
if !seen {
|
||||
raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath)))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
page = wiki.Page{Path: relPath, Content: string(raw)}
|
||||
}
|
||||
pending[relPath] = addSourceRef(page, sourceRef)
|
||||
}
|
||||
}
|
||||
|
||||
for relPath, page := range pending {
|
||||
dest := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
||||
if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil {
|
||||
return 0, fmt.Errorf("write %s: %w", relPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
return len(pending), nil
|
||||
}
|
||||
Reference in New Issue
Block a user