168 lines
5.8 KiB
Go
168 lines
5.8 KiB
Go
// ingestion/internal/pipeline/build_test.go
|
|
package pipeline
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestBuildPages_SourcePage(t *testing.T) {
|
|
raw := []RawPage{
|
|
{
|
|
Title: "Shape Up",
|
|
Type: "source",
|
|
Subtype: "book",
|
|
Domain: "product-strategy",
|
|
Content: "## Summary\n\nA book about shaping product work.\n",
|
|
},
|
|
}
|
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Empty(t, warnings)
|
|
|
|
p := pages[0]
|
|
assert.Equal(t, "wiki/sources/shape-up.md", p.Path)
|
|
assert.Contains(t, p.Content, "title: 'Shape Up'")
|
|
assert.Contains(t, p.Content, "type: 'book'")
|
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
|
assert.Contains(t, p.Content, "date_ingested: 2026-04-23")
|
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
|
assert.Contains(t, p.Content, "aliases:\n - 'Shape Up'")
|
|
assert.Contains(t, p.Content, "## Summary")
|
|
assert.True(t, strings.HasPrefix(p.Content, "---\n"), "content must start with frontmatter")
|
|
}
|
|
|
|
func TestBuildPages_ConceptPage(t *testing.T) {
|
|
raw := []RawPage{
|
|
{
|
|
Title: "Betting",
|
|
Type: "concept",
|
|
Domain: "product-strategy",
|
|
Content: "## Definition\n\nA resource allocation technique.\n",
|
|
},
|
|
}
|
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Empty(t, warnings)
|
|
|
|
p := pages[0]
|
|
assert.Equal(t, "wiki/concepts/betting.md", p.Path)
|
|
assert.Contains(t, p.Content, "title: 'Betting'")
|
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
|
assert.Contains(t, p.Content, "aliases:\n - 'Betting'")
|
|
assert.NotContains(t, p.Content, "date_ingested")
|
|
assert.Contains(t, p.Content, "## Definition")
|
|
}
|
|
|
|
func TestBuildPages_EntityPage(t *testing.T) {
|
|
raw := []RawPage{
|
|
{
|
|
Title: "Ryan Singer",
|
|
Type: "entity",
|
|
Subtype: "person",
|
|
Domain: "product-strategy",
|
|
Content: "## Description\n\nA product designer.\n",
|
|
},
|
|
}
|
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Empty(t, warnings)
|
|
|
|
p := pages[0]
|
|
assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path)
|
|
assert.Contains(t, p.Content, "title: 'Ryan Singer'")
|
|
assert.Contains(t, p.Content, "type: 'person'")
|
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
|
assert.Contains(t, p.Content, "aliases:\n - 'Ryan Singer'")
|
|
assert.NotContains(t, p.Content, "date_ingested")
|
|
}
|
|
|
|
func TestBuildPages_SourceSlugUsedForSourcePage(t *testing.T) {
|
|
// LLM title differs from filename — pipeline uses sourceSlug for the source page path.
|
|
raw := []RawPage{
|
|
{Title: "FinBERT: A Pretrained Model", Type: "source", Subtype: "article", Content: "## Summary\n\nA model.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "finbert-huggingface", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Equal(t, "wiki/sources/finbert-huggingface.md", pages[0].Path)
|
|
}
|
|
|
|
func TestBuildPages_ConceptSlugDerivedFromTitle(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "Domain-Driven Design", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "some-source", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Equal(t, "wiki/concepts/domain-driven-design.md", pages[0].Path)
|
|
}
|
|
|
|
func TestBuildPages_SourceDefaultSubtype(t *testing.T) {
|
|
// If subtype is omitted for a source, default to "article"
|
|
raw := []RawPage{
|
|
{Title: "Some Post", Type: "source", Content: "## Summary\n\nA post.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "some-post", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.Contains(t, pages[0].Content, "type: 'article'")
|
|
}
|
|
|
|
func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.NotContains(t, pages[0].Content, "domain:")
|
|
}
|
|
|
|
func TestBuildPages_MultiplePages(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "Shape Up", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
|
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
|
{Title: "Ryan Singer", Type: "entity", Subtype: "person", Content: "## Description\n\nA designer.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
|
require.Len(t, pages, 3)
|
|
assert.Equal(t, "wiki/sources/shape-up.md", pages[0].Path)
|
|
assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path)
|
|
assert.Equal(t, "wiki/entities/ryan-singer.md", pages[2].Path)
|
|
}
|
|
|
|
func TestBuildPages_TitleWithColon(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "Shape Up: The Basecamp Method", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
// Title with colon must be quoted in YAML
|
|
assert.Contains(t, pages[0].Content, "title: 'Shape Up: The Basecamp Method'")
|
|
assert.Contains(t, pages[0].Content, "aliases:\n - 'Shape Up: The Basecamp Method'")
|
|
}
|
|
|
|
func TestBuildPages_EntityNoSubtype(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "Basecamp", Type: "entity", Content: "## Description\n\nA company.\n"},
|
|
}
|
|
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
|
require.Len(t, pages, 1)
|
|
assert.NotContains(t, pages[0].Content, "type:")
|
|
assert.Contains(t, pages[0].Content, "title: 'Basecamp'")
|
|
}
|
|
|
|
func TestBuildPages_EmptyTitleSkippedWithWarning(t *testing.T) {
|
|
raw := []RawPage{
|
|
{Title: "", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
|
}
|
|
pages, warnings := BuildPages(raw, "src", "2026-04-23")
|
|
require.Len(t, pages, 1, "empty-title page should be skipped")
|
|
assert.Equal(t, "wiki/concepts/betting.md", pages[0].Path)
|
|
assert.Len(t, warnings, 1)
|
|
assert.Contains(t, warnings[0], "empty title")
|
|
}
|