Files
hyperguild/ingestion/internal/graph/extract_test.go
Mathias 3084c4173d
All checks were successful
CI / Lint / Test / Vet (push) Successful in 12s
CI / Mirror to GitHub (push) Successful in 4s
fix(graph): route wiki/<flat>.md to Type=knowledge, not Type=hall with filename-as-wing
classifyByPath had a hole: paths like wiki/index.md or wiki/<slug>.md
(direct children of wiki/, no subdirectory) hit the default branch and
wrote Wing=parts[1] — which IS the filename, not a wing. Symptom in
brain_entities: rows like (slug=index, wing=index.md) and
(slug=autobe-..., wing=autobe-evaluation-pattern-....md).

Fix: when len(parts) < 3 (no subdirectory at all), fall through to
Type=knowledge and let frontmatter set wing/hall if present.

Add brain/eval/ artifacts at the same time:
- qa-2026-05.md — 20 hand-authored Q→expected-slug pairs covering the
  homelab knowledge corpus across mcp, dex, gitops, postgres, go,
  models, methodology
- score.py — calls brain_query for each pair, scores top-1 + top-3,
  emits per-question detail. BRAIN_MCP_TOKEN via env.

Pre-fix baseline against the live brain: top-1 = 20% (4/20),
top-3 = 65% (13/20). Six hard misses where the expected slug doesn't
even land in the top-5.

Used to gate the phase 2 DIKW redesign (infra#62 follow-up): if
phase 1 fixes (this parser fix + 20 backlink authoring on top
orphans) lift top-1 by <10 absolute points, structure is the
bottleneck and the tier redesign is justified.
2026-05-24 22:33:04 +02:00

135 lines
3.9 KiB
Go

package graph
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtract_HallDoc(t *testing.T) {
content := []byte(`---
wing: jepa-fx
hall: decisions
title: Val Vol Decision
---
# Val Vol
See also [[other-decision]] and [[parent-concept|Parent Concept]].
Linking to [[unrelated]].
`)
ent, edges, ok := Extract("wiki/jepa-fx/decisions/val-vol.md", content)
require.True(t, ok)
assert.Equal(t, "val-vol", ent.Slug)
assert.Equal(t, "hall", ent.Type)
assert.Equal(t, "jepa-fx", ent.Wing)
assert.Equal(t, "decisions", ent.Hall)
assert.Equal(t, "Val Vol Decision", ent.Title)
require.Len(t, edges, 3)
assert.Equal(t, "other-decision", edges[0].DstSlug)
assert.Equal(t, "parent-concept", edges[1].DstSlug)
assert.Equal(t, "unrelated", edges[2].DstSlug)
for _, e := range edges {
assert.Equal(t, "wikilink", e.EdgeType)
assert.Equal(t, "val-vol", e.SrcSlug)
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", e.SrcDoc)
assert.Greater(t, e.SrcLine, 0)
}
}
func TestExtract_LegacyConceptDoc(t *testing.T) {
content := []byte(`---
title: Hash Encoding
---
# Hash Encoding
Linked to [[financial-sentiment-analysis|FSA]].
`)
ent, edges, ok := Extract("wiki/concepts/hash-encoding.md", content)
require.True(t, ok)
assert.Equal(t, "hash-encoding", ent.Slug)
assert.Equal(t, "concept", ent.Type)
assert.Empty(t, ent.Wing)
assert.Empty(t, ent.Hall)
assert.Equal(t, "Hash Encoding", ent.Title)
require.Len(t, edges, 1)
assert.Equal(t, "financial-sentiment-analysis", edges[0].DstSlug)
}
func TestExtract_KnowledgeDoc(t *testing.T) {
content := []byte("# No frontmatter, no links here.\n")
ent, edges, ok := Extract("knowledge/some-note.md", content)
require.True(t, ok)
assert.Equal(t, "some-note", ent.Slug)
assert.Equal(t, "knowledge", ent.Type)
assert.Empty(t, edges)
}
func TestExtract_DedupesRepeatedLinkOnSameLine(t *testing.T) {
content := []byte("See [[foo]] and [[foo]] again on the same line.\n")
_, edges, ok := Extract("knowledge/dup.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, "foo", edges[0].DstSlug)
}
func TestExtract_KeepsMultipleEdgesOnDifferentLines(t *testing.T) {
content := []byte("First mention [[foo]].\n\nSecond mention [[foo]].\n")
_, edges, ok := Extract("knowledge/multi.md", content)
require.True(t, ok)
require.Len(t, edges, 2)
assert.NotEqual(t, edges[0].SrcLine, edges[1].SrcLine)
}
func TestExtract_IgnoresSelfLinks(t *testing.T) {
content := []byte("Self-reference [[self]] should be ignored.\n")
_, edges, ok := Extract("knowledge/self.md", content)
require.True(t, ok)
assert.Empty(t, edges)
}
func TestExtract_RejectsNonMarkdown(t *testing.T) {
_, _, ok := Extract("wiki/concepts/not-markdown.txt", []byte("anything"))
assert.False(t, ok)
}
func TestExtract_LineNumbersAre1Indexed(t *testing.T) {
content := []byte("line 1\nline 2 [[bar]]\n")
_, edges, ok := Extract("knowledge/lines.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, 2, edges[0].SrcLine)
}
// Files directly under wiki/ (no subdirectory) used to land
// Type="hall" Wing="<filename>.md" because the path's second segment
// was the file itself. The fix routes them to Type="knowledge" with
// empty Wing/Hall and lets frontmatter set them if present.
func TestExtract_WikiRootFileIsKnowledgeNotHall(t *testing.T) {
content := []byte("# Index\n\n- [[foo]]\n")
ent, _, ok := Extract("wiki/index.md", content)
require.True(t, ok)
assert.Equal(t, "index", ent.Slug)
assert.Equal(t, "knowledge", ent.Type)
assert.Empty(t, ent.Wing)
assert.Empty(t, ent.Hall)
}
func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) {
content := []byte(`---
wing: homelab
hall: facts
---
# Some root note
`)
ent, _, ok := Extract("wiki/some-note.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Type)
assert.Equal(t, "homelab", ent.Wing)
assert.Equal(t, "facts", ent.Hall)
}