feat(graph): add brain_entities + brain_edges store and wikilink parser
Foundation for Track A (GraphRAG on top of existing wiki). Two new packages, both unwired — service behaviour unchanged until commit 2 hooks the pipeline. - internal/graph: pure parser. Extract() walks markdown + frontmatter and emits one Entity + N wikilink Edges per doc. Dedupes per (dst, line), ignores self-references, classifies hall/concept/entity/ source/knowledge from path layout. - internal/graphstore: pgx-backed PGStore mirroring vectorstore's shape. Idempotent Init() creates brain_entities + brain_edges with indexes on src_slug, dst_slug, src_doc, wing, type. Operations: UpsertEntity, ReplaceEdgesForDoc (tx), DeleteByDoc, Neighbors, Subgraph (recursive CTE, depth ≤6), Path (shortest path, depth ≤8). Schema lives on the shared postgres18 instance alongside the brain_embeddings table — no new datastore. See docs/superpowers/specs/2026-05-homelab-training-graph-next-step.md in infra repo + infra#62. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
106
ingestion/internal/graph/extract_test.go
Normal file
106
ingestion/internal/graph/extract_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestExtract_HallDoc(t *testing.T) {
|
||||
content := []byte(`---
|
||||
wing: jepa-fx
|
||||
hall: decisions
|
||||
title: Val Vol Decision
|
||||
---
|
||||
# Val Vol
|
||||
|
||||
See also [[other-decision]] and [[parent-concept|Parent Concept]].
|
||||
|
||||
Linking to [[unrelated]].
|
||||
`)
|
||||
|
||||
ent, edges, ok := Extract("wiki/jepa-fx/decisions/val-vol.md", content)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "val-vol", ent.Slug)
|
||||
assert.Equal(t, "hall", ent.Type)
|
||||
assert.Equal(t, "jepa-fx", ent.Wing)
|
||||
assert.Equal(t, "decisions", ent.Hall)
|
||||
assert.Equal(t, "Val Vol Decision", ent.Title)
|
||||
|
||||
require.Len(t, edges, 3)
|
||||
assert.Equal(t, "other-decision", edges[0].DstSlug)
|
||||
assert.Equal(t, "parent-concept", edges[1].DstSlug)
|
||||
assert.Equal(t, "unrelated", edges[2].DstSlug)
|
||||
for _, e := range edges {
|
||||
assert.Equal(t, "wikilink", e.EdgeType)
|
||||
assert.Equal(t, "val-vol", e.SrcSlug)
|
||||
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", e.SrcDoc)
|
||||
assert.Greater(t, e.SrcLine, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtract_LegacyConceptDoc(t *testing.T) {
|
||||
content := []byte(`---
|
||||
title: Hash Encoding
|
||||
---
|
||||
# Hash Encoding
|
||||
|
||||
Linked to [[financial-sentiment-analysis|FSA]].
|
||||
`)
|
||||
ent, edges, ok := Extract("wiki/concepts/hash-encoding.md", content)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "hash-encoding", ent.Slug)
|
||||
assert.Equal(t, "concept", ent.Type)
|
||||
assert.Empty(t, ent.Wing)
|
||||
assert.Empty(t, ent.Hall)
|
||||
assert.Equal(t, "Hash Encoding", ent.Title)
|
||||
|
||||
require.Len(t, edges, 1)
|
||||
assert.Equal(t, "financial-sentiment-analysis", edges[0].DstSlug)
|
||||
}
|
||||
|
||||
func TestExtract_KnowledgeDoc(t *testing.T) {
|
||||
content := []byte("# No frontmatter, no links here.\n")
|
||||
ent, edges, ok := Extract("knowledge/some-note.md", content)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "some-note", ent.Slug)
|
||||
assert.Equal(t, "knowledge", ent.Type)
|
||||
assert.Empty(t, edges)
|
||||
}
|
||||
|
||||
func TestExtract_DedupesRepeatedLinkOnSameLine(t *testing.T) {
|
||||
content := []byte("See [[foo]] and [[foo]] again on the same line.\n")
|
||||
_, edges, ok := Extract("knowledge/dup.md", content)
|
||||
require.True(t, ok)
|
||||
require.Len(t, edges, 1)
|
||||
assert.Equal(t, "foo", edges[0].DstSlug)
|
||||
}
|
||||
|
||||
func TestExtract_KeepsMultipleEdgesOnDifferentLines(t *testing.T) {
|
||||
content := []byte("First mention [[foo]].\n\nSecond mention [[foo]].\n")
|
||||
_, edges, ok := Extract("knowledge/multi.md", content)
|
||||
require.True(t, ok)
|
||||
require.Len(t, edges, 2)
|
||||
assert.NotEqual(t, edges[0].SrcLine, edges[1].SrcLine)
|
||||
}
|
||||
|
||||
func TestExtract_IgnoresSelfLinks(t *testing.T) {
|
||||
content := []byte("Self-reference [[self]] should be ignored.\n")
|
||||
_, edges, ok := Extract("knowledge/self.md", content)
|
||||
require.True(t, ok)
|
||||
assert.Empty(t, edges)
|
||||
}
|
||||
|
||||
func TestExtract_RejectsNonMarkdown(t *testing.T) {
|
||||
_, _, ok := Extract("wiki/concepts/not-markdown.txt", []byte("anything"))
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
func TestExtract_LineNumbersAre1Indexed(t *testing.T) {
|
||||
content := []byte("line 1\nline 2 [[bar]]\n")
|
||||
_, edges, ok := Extract("knowledge/lines.md", content)
|
||||
require.True(t, ok)
|
||||
require.Len(t, edges, 1)
|
||||
assert.Equal(t, 2, edges[0].SrcLine)
|
||||
}
|
||||
Reference in New Issue
Block a user