// ingestion/internal/pipeline/links.go package pipeline import ( "fmt" "path/filepath" "regexp" "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) // plainLinkRE matches [[Display Name]] — wikilinks without a slug pipe. // It does NOT match [[slug|Display]] (those already have a pipe). var plainLinkRE = regexp.MustCompile(`\[\[([^\]|]+)\]\]`) // CanonicalizeLinks converts [[Display Name]] wikilinks to [[slug|Display Name]] // using a title→slug map built from the inventory and current batch. // Unknown titles are left as-is and returned as warnings. func CanonicalizeLinks(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) ([]wiki.Page, []string) { titleToSlug := buildTitleMap(pages, inventory) var allWarnings []string out := make([]wiki.Page, len(pages)) for i, p := range pages { newContent, warnings := canonicalizeContent(p.Content, titleToSlug) p.Content = newContent out[i] = p allWarnings = append(allWarnings, warnings...) } return out, allWarnings } // buildTitleMap builds a lowercase-title → slug map from inventory and current batch. // Current batch entries take precedence over inventory (they may be updates). func buildTitleMap(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) map[string]string { m := make(map[string]string) for _, entries := range inventory { for _, e := range entries { m[strings.ToLower(e.Title)] = e.Slug } } // Current batch overrides inventory for _, p := range pages { title := extractTitle(p.Content) slug := strings.TrimSuffix(filepath.Base(p.Path), ".md") if title != "" && slug != "" { m[strings.ToLower(title)] = slug } } return m } func canonicalizeContent(content string, titleToSlug map[string]string) (string, []string) { var warnings []string result := plainLinkRE.ReplaceAllStringFunc(content, func(match string) string { sub := plainLinkRE.FindStringSubmatch(match) if len(sub) < 2 { return match } displayName := sub[1] slug, ok := titleToSlug[strings.ToLower(displayName)] if !ok { warnings = append(warnings, fmt.Sprintf("unknown wikilink: [[%s]]", displayName)) return match } return "[[" + slug + "|" + displayName + "]]" }) return result, warnings }