feat(pipeline): replace ParsePages with ParseRawPages + RawPage type

Strips slug authority from the LLM. The new RawPage type carries only
{title, type, subtype, domain, content} — no paths or frontmatter.
Pipeline will derive slugs deterministically (Task 4).

pipeline.go gets a temporary bridge stub (TODO task4) to keep the
package compiling between tasks.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mathias Bergqvist
2026-04-23 18:41:33 +02:00
parent 989f375aec
commit 26b5636b43
3 changed files with 55 additions and 29 deletions

View File

@@ -8,39 +8,54 @@ import (
"github.com/stretchr/testify/require"
)
func TestParsePages_ValidJSON(t *testing.T) {
input := `[{"path":"wiki/sources/foo.md","content":"# Foo"},{"path":"wiki/concepts/bar.md","content":"# Bar"}]`
pages, warnings := ParsePages(input)
func TestParseRawPages_ValidJSON(t *testing.T) {
input := `[{"title":"Shape Up","type":"source","subtype":"book","domain":"product-strategy","content":"## Summary\n\nFoo."},{"title":"Betting","type":"concept","content":"## Definition\n\nA technique."}]`
pages, warnings := ParseRawPages(input)
require.Len(t, pages, 2)
assert.Empty(t, warnings)
assert.Equal(t, "wiki/sources/foo.md", pages[0].Path)
assert.Equal(t, "wiki/concepts/bar.md", pages[1].Path)
assert.Equal(t, "Shape Up", pages[0].Title)
assert.Equal(t, "source", pages[0].Type)
assert.Equal(t, "book", pages[0].Subtype)
assert.Equal(t, "product-strategy", pages[0].Domain)
assert.Equal(t, "Betting", pages[1].Title)
assert.Equal(t, "concept", pages[1].Type)
assert.Empty(t, pages[1].Subtype)
}
func TestParsePages_StripsFences(t *testing.T) {
input := "```json\n[{\"path\":\"wiki/sources/foo.md\",\"content\":\"# Foo\"}]\n```"
pages, warnings := ParsePages(input)
assert.Len(t, pages, 1)
assert.Empty(t, warnings)
}
func TestParsePages_TruncationRecovery(t *testing.T) {
input := `[{"path":"wiki/sources/foo.md","content":"# Foo"},{"path":"wiki/concepts/bar.md","content":"trunc`
pages, warnings := ParsePages(input)
func TestParseRawPages_StripsFences(t *testing.T) {
input := "```json\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"## Definition\\n\\nFoo.\"}]\n```"
pages, warnings := ParseRawPages(input)
require.Len(t, pages, 1)
assert.Equal(t, "wiki/sources/foo.md", pages[0].Path)
assert.Empty(t, warnings)
assert.Equal(t, "Foo", pages[0].Title)
}
func TestParseRawPages_TruncationRecovery(t *testing.T) {
input := `[{"title":"Foo","type":"concept","content":"## Definition\n\nFoo."},{"title":"Bar","type":"concept","content":"trunc`
pages, warnings := ParseRawPages(input)
require.Len(t, pages, 1)
assert.Equal(t, "Foo", pages[0].Title)
assert.NotEmpty(t, warnings)
}
func TestParsePages_EmptyInput(t *testing.T) {
pages, warnings := ParsePages("")
func TestParseRawPages_EmptyInput(t *testing.T) {
pages, warnings := ParseRawPages("")
assert.Empty(t, pages)
assert.NotEmpty(t, warnings)
}
func TestParsePages_PlainFence(t *testing.T) {
input := "```\n[{\"path\":\"wiki/sources/foo.md\",\"content\":\"ok\"}]\n```"
pages, warnings := ParsePages(input)
assert.Len(t, pages, 1)
func TestParseRawPages_PlainFence(t *testing.T) {
input := "```\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"ok\"}]\n```"
pages, warnings := ParseRawPages(input)
require.Len(t, pages, 1)
assert.Empty(t, warnings)
}
func TestParseRawPages_MissingTitle(t *testing.T) {
// Missing title — still parsed, Title is empty string
input := `[{"type":"concept","content":"## Definition\n\nFoo."}]`
pages, warnings := ParseRawPages(input)
require.Len(t, pages, 1)
assert.Empty(t, warnings)
assert.Empty(t, pages[0].Title)
}