89 lines
2.4 KiB
Go
89 lines
2.4 KiB
Go
// ingestion/internal/pipeline/resolve.go
|
|
package pipeline
|
|
|
|
import (
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
// Resolve remaps proposed pages to existing slugs when a fuzzy title match is found.
|
|
// It only matches within the same page type (entities→entities, concepts→concepts).
|
|
// Pages with no inventory match are returned unchanged.
|
|
func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page {
|
|
type key struct {
|
|
pt wiki.PageType
|
|
normalized string
|
|
}
|
|
lookup := make(map[key]string) // key → canonical slug
|
|
for pt, entries := range inventory {
|
|
for _, e := range entries {
|
|
k := key{pt: pt, normalized: normalizeTitle(e.Title)}
|
|
lookup[k] = e.Slug
|
|
for _, alias := range e.Aliases {
|
|
ak := key{pt: pt, normalized: normalizeTitle(alias)}
|
|
if _, exists := lookup[ak]; !exists {
|
|
lookup[ak] = e.Slug
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
out := make([]wiki.Page, 0, len(proposed))
|
|
for _, page := range proposed {
|
|
pt := pageTypeFromPath(page.Path)
|
|
title := extractTitle(page.Content)
|
|
k := key{pt: pt, normalized: normalizeTitle(title)}
|
|
if canonicalSlug, ok := lookup[k]; ok {
|
|
dir := filepath.Dir(page.Path)
|
|
page.Path = dir + "/" + canonicalSlug + ".md"
|
|
}
|
|
out = append(out, page)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// normalizeTitle lowercases, removes leading articles, collapses whitespace.
|
|
// "The Shape Up Method" → "shape up method"
|
|
func normalizeTitle(s string) string {
|
|
s = strings.ToLower(strings.TrimSpace(s))
|
|
for _, article := range []string{"the ", "a ", "an "} {
|
|
s = strings.TrimPrefix(s, article)
|
|
}
|
|
s = strings.ReplaceAll(s, "-", " ")
|
|
return strings.Join(strings.Fields(s), " ")
|
|
}
|
|
|
|
// pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md".
|
|
func pageTypeFromPath(path string) wiki.PageType {
|
|
parts := strings.Split(filepath.ToSlash(path), "/")
|
|
if len(parts) >= 2 {
|
|
return wiki.PageType(parts[1])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// extractTitle reads the title field from YAML frontmatter in content.
|
|
// Falls back to empty string if not found.
|
|
func extractTitle(content string) string {
|
|
lines := strings.SplitN(content, "\n", 30)
|
|
inFM := false
|
|
for _, line := range lines {
|
|
if strings.TrimSpace(line) == "---" {
|
|
if !inFM {
|
|
inFM = true
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
if inFM {
|
|
key, val, ok := strings.Cut(line, ":")
|
|
if ok && strings.TrimSpace(key) == "title" {
|
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|