Files
hyperguild/ingestion/internal/pipeline/build.go

89 lines
2.1 KiB
Go

// ingestion/internal/pipeline/build.go
package pipeline
import (
"fmt"
"strings"
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
)
// BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs,
// paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested
// (derived from the filename, not the LLM title).
func BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page {
out := make([]wiki.Page, 0, len(rawPages))
for _, rp := range rawPages {
out = append(out, buildPage(rp, sourceSlug, date))
}
return out
}
func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
var slug, dir string
switch rp.Type {
case "source":
slug = sourceSlug
dir = "wiki/sources"
case "concept":
slug = wiki.Slug(rp.Title)
dir = "wiki/concepts"
case "entity":
slug = wiki.Slug(rp.Title)
dir = "wiki/entities"
default:
slug = wiki.Slug(rp.Title)
dir = "wiki/" + rp.Type
}
path := dir + "/" + slug + ".md"
fm := buildFrontmatter(rp, date)
return wiki.Page{
Path: path,
Content: fm + "\n" + rp.Content,
}
}
func buildFrontmatter(rp RawPage, date string) string {
var sb strings.Builder
sb.WriteString("---\n")
fmt.Fprintf(&sb, "title: %s\n", rp.Title)
switch rp.Type {
case "source":
subtype := rp.Subtype
if subtype == "" {
subtype = "article"
}
fmt.Fprintf(&sb, "type: %s\n", subtype)
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
}
fmt.Fprintf(&sb, "date_ingested: %s\n", date)
fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "concept":
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "entity":
if rp.Subtype != "" {
fmt.Fprintf(&sb, "type: %s\n", rp.Subtype)
}
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
default:
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
}
fmt.Fprintf(&sb, "aliases:\n - %s\n", rp.Title)
sb.WriteString("---\n")
return sb.String()
}