extract.go now reads `tier:` and `topic:` from YAML frontmatter, with a path-based fallback when frontmatter is absent (the pre-M3 state on every existing entry): knowledge/* → tier=knowledge notes/* → tier=note wiki/** → tier=note (sources + concepts + entities are I-level) inbox/**, raw/**, sessions/**, clips/** → tier=inbox Frontmatter wins when present — covers the M3-migrated case where an entry's path may not match the tier the author chose for it. UpsertEntity persists both columns. M1's schema already has them. Backfill on next pod start populates tier for the whole corpus without any file moves; M3 will follow up with the actual layout migration and explicit frontmatter writes.
180 lines
5.1 KiB
Go
180 lines
5.1 KiB
Go
package graph
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestExtract_HallDoc(t *testing.T) {
|
|
content := []byte(`---
|
|
wing: jepa-fx
|
|
hall: decisions
|
|
title: Val Vol Decision
|
|
---
|
|
# Val Vol
|
|
|
|
See also [[other-decision]] and [[parent-concept|Parent Concept]].
|
|
|
|
Linking to [[unrelated]].
|
|
`)
|
|
|
|
ent, edges, ok := Extract("wiki/jepa-fx/decisions/val-vol.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "val-vol", ent.Slug)
|
|
assert.Equal(t, "hall", ent.Type)
|
|
assert.Equal(t, "jepa-fx", ent.Wing)
|
|
assert.Equal(t, "decisions", ent.Hall)
|
|
assert.Equal(t, "Val Vol Decision", ent.Title)
|
|
|
|
require.Len(t, edges, 3)
|
|
assert.Equal(t, "other-decision", edges[0].DstSlug)
|
|
assert.Equal(t, "parent-concept", edges[1].DstSlug)
|
|
assert.Equal(t, "unrelated", edges[2].DstSlug)
|
|
for _, e := range edges {
|
|
assert.Equal(t, "wikilink", e.EdgeType)
|
|
assert.Equal(t, "val-vol", e.SrcSlug)
|
|
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", e.SrcDoc)
|
|
assert.Greater(t, e.SrcLine, 0)
|
|
}
|
|
}
|
|
|
|
func TestExtract_LegacyConceptDoc(t *testing.T) {
|
|
content := []byte(`---
|
|
title: Hash Encoding
|
|
---
|
|
# Hash Encoding
|
|
|
|
Linked to [[financial-sentiment-analysis|FSA]].
|
|
`)
|
|
ent, edges, ok := Extract("wiki/concepts/hash-encoding.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "hash-encoding", ent.Slug)
|
|
assert.Equal(t, "concept", ent.Type)
|
|
assert.Empty(t, ent.Wing)
|
|
assert.Empty(t, ent.Hall)
|
|
assert.Equal(t, "Hash Encoding", ent.Title)
|
|
|
|
require.Len(t, edges, 1)
|
|
assert.Equal(t, "financial-sentiment-analysis", edges[0].DstSlug)
|
|
}
|
|
|
|
func TestExtract_KnowledgeDoc(t *testing.T) {
|
|
content := []byte("# No frontmatter, no links here.\n")
|
|
ent, edges, ok := Extract("knowledge/some-note.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "some-note", ent.Slug)
|
|
assert.Equal(t, "knowledge", ent.Type)
|
|
assert.Empty(t, edges)
|
|
}
|
|
|
|
func TestExtract_DedupesRepeatedLinkOnSameLine(t *testing.T) {
|
|
content := []byte("See [[foo]] and [[foo]] again on the same line.\n")
|
|
_, edges, ok := Extract("knowledge/dup.md", content)
|
|
require.True(t, ok)
|
|
require.Len(t, edges, 1)
|
|
assert.Equal(t, "foo", edges[0].DstSlug)
|
|
}
|
|
|
|
func TestExtract_KeepsMultipleEdgesOnDifferentLines(t *testing.T) {
|
|
content := []byte("First mention [[foo]].\n\nSecond mention [[foo]].\n")
|
|
_, edges, ok := Extract("knowledge/multi.md", content)
|
|
require.True(t, ok)
|
|
require.Len(t, edges, 2)
|
|
assert.NotEqual(t, edges[0].SrcLine, edges[1].SrcLine)
|
|
}
|
|
|
|
func TestExtract_IgnoresSelfLinks(t *testing.T) {
|
|
content := []byte("Self-reference [[self]] should be ignored.\n")
|
|
_, edges, ok := Extract("knowledge/self.md", content)
|
|
require.True(t, ok)
|
|
assert.Empty(t, edges)
|
|
}
|
|
|
|
func TestExtract_RejectsNonMarkdown(t *testing.T) {
|
|
_, _, ok := Extract("wiki/concepts/not-markdown.txt", []byte("anything"))
|
|
assert.False(t, ok)
|
|
}
|
|
|
|
func TestExtract_LineNumbersAre1Indexed(t *testing.T) {
|
|
content := []byte("line 1\nline 2 [[bar]]\n")
|
|
_, edges, ok := Extract("knowledge/lines.md", content)
|
|
require.True(t, ok)
|
|
require.Len(t, edges, 1)
|
|
assert.Equal(t, 2, edges[0].SrcLine)
|
|
}
|
|
|
|
// Files directly under wiki/ (no subdirectory) used to land
|
|
// Type="hall" Wing="<filename>.md" because the path's second segment
|
|
// was the file itself. The fix routes them to Type="knowledge" with
|
|
// empty Wing/Hall and lets frontmatter set them if present.
|
|
func TestExtract_WikiRootFileIsKnowledgeNotHall(t *testing.T) {
|
|
content := []byte("# Index\n\n- [[foo]]\n")
|
|
ent, _, ok := Extract("wiki/index.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "index", ent.Slug)
|
|
assert.Equal(t, "knowledge", ent.Type)
|
|
assert.Empty(t, ent.Wing)
|
|
assert.Empty(t, ent.Hall)
|
|
}
|
|
|
|
func TestExtract_TierFromFrontmatter(t *testing.T) {
|
|
content := []byte(`---
|
|
tier: knowledge
|
|
topic: postgres-roles
|
|
title: Least-privilege migration trap
|
|
---
|
|
# body
|
|
`)
|
|
ent, _, ok := Extract("knowledge/some-lesson.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "knowledge", ent.Tier)
|
|
assert.Equal(t, "postgres-roles", ent.Topic)
|
|
}
|
|
|
|
func TestExtract_TierInferredFromPath(t *testing.T) {
|
|
cases := []struct {
|
|
path string
|
|
want string
|
|
}{
|
|
{"knowledge/foo.md", "knowledge"},
|
|
{"wiki/sources/x.md", "note"},
|
|
{"wiki/concepts/x.md", "note"},
|
|
{"wiki/x.md", "note"},
|
|
{"inbox/clips/x.md", "inbox"},
|
|
{"notes/x.md", "note"},
|
|
{"raw/x.md", "inbox"},
|
|
{"sessions/x.md", "inbox"},
|
|
}
|
|
for _, tc := range cases {
|
|
ent, _, ok := Extract(tc.path, []byte("# x\n"))
|
|
require.True(t, ok, tc.path)
|
|
assert.Equal(t, tc.want, ent.Tier, tc.path)
|
|
}
|
|
}
|
|
|
|
func TestExtract_FrontmatterTierBeatsPathInference(t *testing.T) {
|
|
// A clip explicitly promoted via frontmatter wins over the path's
|
|
// inbox inference. Catches the case where a file has been moved
|
|
// to a new location but frontmatter hasn't been updated.
|
|
content := []byte("---\ntier: knowledge\n---\n# x\n")
|
|
ent, _, ok := Extract("inbox/clips/x.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "knowledge", ent.Tier)
|
|
}
|
|
|
|
func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) {
|
|
content := []byte(`---
|
|
wing: homelab
|
|
hall: facts
|
|
---
|
|
# Some root note
|
|
`)
|
|
ent, _, ok := Extract("wiki/some-note.md", content)
|
|
require.True(t, ok)
|
|
assert.Equal(t, "knowledge", ent.Type)
|
|
assert.Equal(t, "homelab", ent.Wing)
|
|
assert.Equal(t, "facts", ent.Hall)
|
|
}
|