feat(pipeline): add fuzzy entity resolution to prevent slug proliferation

This commit is contained in:
Mathias Bergqvist
2026-04-23 15:59:36 +02:00
parent bf6f497d9d
commit e9b5cc401c
2 changed files with 178 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
// ingestion/internal/pipeline/resolve_test.go
package pipeline
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
)
func TestResolve_NoMatch(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/entities/new-person.md", Content: "---\ntitle: New Person\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{
wiki.PageTypeEntity: {
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer"}},
},
}
got := Resolve(proposed, inventory)
assert.Len(t, got, 1)
assert.Equal(t, "wiki/entities/new-person.md", got[0].Path)
}
func TestResolve_TitleMatchRedirectsSlug(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/entities/ryan-singer-the-designer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{
wiki.PageTypeEntity: {
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
},
}
got := Resolve(proposed, inventory)
assert.Len(t, got, 1)
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
}
func TestResolve_AliasMatchRedirectsSlug(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/entities/singer.md", Content: "---\ntitle: Singer\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{
wiki.PageTypeEntity: {
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer", "R. Singer"}},
},
}
got := Resolve(proposed, inventory)
assert.Len(t, got, 1)
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
}
func TestResolve_NormalizationCaseAndArticles(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/concepts/the-shape-up-method.md", Content: "---\ntitle: The Shape Up Method\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{
wiki.PageTypeConcept: {
{Slug: "shape-up-method", Title: "Shape Up Method", Aliases: nil},
},
}
got := Resolve(proposed, inventory)
assert.Len(t, got, 1)
assert.Equal(t, "wiki/concepts/shape-up-method.md", got[0].Path)
}
func TestResolve_OnlyMatchesSamePageType(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/concepts/ryan-singer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{
wiki.PageTypeEntity: {
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
},
wiki.PageTypeConcept: {},
}
got := Resolve(proposed, inventory)
assert.Len(t, got, 1)
assert.Equal(t, "wiki/concepts/ryan-singer.md", got[0].Path)
}
func TestResolve_EmptyInventory(t *testing.T) {
proposed := []wiki.Page{
{Path: "wiki/entities/first.md", Content: "---\ntitle: First\n---\n"},
}
inventory := map[wiki.PageType][]wiki.Entry{}
got := Resolve(proposed, inventory)
assert.Equal(t, proposed, got)
}