feat(graph,graphstore): M2 parse tier+topic from frontmatter, persist via Upsert (infra#72)
All checks were successful
CI / Lint / Test / Vet (push) Successful in 13s
CI / Mirror to GitHub (push) Successful in 4s

extract.go now reads `tier:` and `topic:` from YAML frontmatter, with
a path-based fallback when frontmatter is absent (the pre-M3 state on
every existing entry):

  knowledge/* → tier=knowledge
  notes/*     → tier=note
  wiki/**     → tier=note   (sources + concepts + entities are I-level)
  inbox/**, raw/**, sessions/**, clips/** → tier=inbox

Frontmatter wins when present — covers the M3-migrated case where an
entry's path may not match the tier the author chose for it.

UpsertEntity persists both columns. M1's schema already has them.

Backfill on next pod start populates tier for the whole corpus
without any file moves; M3 will follow up with the actual layout
migration and explicit frontmatter writes.
This commit is contained in:
Mathias
2026-05-25 12:35:38 +02:00
parent ea9518e712
commit d5f112b600
3 changed files with 95 additions and 3 deletions

View File

@@ -35,6 +35,12 @@ type Entity struct {
Wing string // optional; from frontmatter or path Wing string // optional; from frontmatter or path
Hall string // optional; from frontmatter or path Hall string // optional; from frontmatter or path
Title string // optional; from frontmatter Title string // optional; from frontmatter
// DIKW tier — infra#72. Empty until M3 migration writes `tier:`
// frontmatter to every entry. Path-inferred tier kicks in as a
// fallback so the column populates immediately on backfill even
// for entries that haven't had their frontmatter rewritten yet.
Tier string // "inbox" | "note" | "knowledge"
Topic string // kebab-slug; the thing the entry is about
} }
// Edge represents a directed relationship between two slugs. // Edge represents a directed relationship between two slugs.
@@ -67,11 +73,42 @@ func Extract(docPath string, content []byte) (Entity, []Edge, bool) {
ent := Entity{DocPath: docPath, Slug: slug} ent := Entity{DocPath: docPath, Slug: slug}
classifyByPath(&ent, docPath) classifyByPath(&ent, docPath)
readFrontmatter(&ent, content) readFrontmatter(&ent, content)
inferTierFromPath(&ent, docPath)
edges := extractEdges(docPath, slug, content) edges := extractEdges(docPath, slug, content)
return ent, edges, true return ent, edges, true
} }
// inferTierFromPath fills Tier when frontmatter didn't already set it.
// The new layout has dedicated subtrees per tier; pre-migration paths
// (knowledge/, wiki/, raw/, sessions/) get their best-guess mapping so
// the column populates on backfill before the M3 file moves run.
func inferTierFromPath(e *Entity, docPath string) {
if e.Tier != "" {
return
}
parts := strings.Split(docPath, "/")
if len(parts) == 0 {
return
}
switch parts[0] {
case "inbox":
e.Tier = "inbox"
case "notes":
e.Tier = "note"
case "knowledge":
e.Tier = "knowledge"
case "wiki":
// Pre-M3 wiki layout: sources are synth output of raw inbox
// material (I tier); concepts + entities are reference notes
// (also I tier); top-level wiki/<slug>.md is unstructured
// reference too. None of these are reusable lessons (K).
e.Tier = "note"
case "raw", "sessions", "clips":
e.Tier = "inbox"
}
}
func slugFromPath(docPath string) string { func slugFromPath(docPath string) string {
base := filepath.Base(docPath) base := filepath.Base(docPath)
if !strings.HasSuffix(base, ".md") { if !strings.HasSuffix(base, ".md") {
@@ -152,6 +189,14 @@ func readFrontmatter(e *Entity, content []byte) {
if e.Hall == "" { if e.Hall == "" {
e.Hall = v e.Hall = v
} }
case "tier":
if e.Tier == "" {
e.Tier = v
}
case "topic":
if e.Topic == "" {
e.Topic = v
}
} }
} }
} }

View File

@@ -119,6 +119,51 @@ func TestExtract_WikiRootFileIsKnowledgeNotHall(t *testing.T) {
assert.Empty(t, ent.Hall) assert.Empty(t, ent.Hall)
} }
func TestExtract_TierFromFrontmatter(t *testing.T) {
content := []byte(`---
tier: knowledge
topic: postgres-roles
title: Least-privilege migration trap
---
# body
`)
ent, _, ok := Extract("knowledge/some-lesson.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Tier)
assert.Equal(t, "postgres-roles", ent.Topic)
}
func TestExtract_TierInferredFromPath(t *testing.T) {
cases := []struct {
path string
want string
}{
{"knowledge/foo.md", "knowledge"},
{"wiki/sources/x.md", "note"},
{"wiki/concepts/x.md", "note"},
{"wiki/x.md", "note"},
{"inbox/clips/x.md", "inbox"},
{"notes/x.md", "note"},
{"raw/x.md", "inbox"},
{"sessions/x.md", "inbox"},
}
for _, tc := range cases {
ent, _, ok := Extract(tc.path, []byte("# x\n"))
require.True(t, ok, tc.path)
assert.Equal(t, tc.want, ent.Tier, tc.path)
}
}
func TestExtract_FrontmatterTierBeatsPathInference(t *testing.T) {
// A clip explicitly promoted via frontmatter wins over the path's
// inbox inference. Catches the case where a file has been moved
// to a new location but frontmatter hasn't been updated.
content := []byte("---\ntier: knowledge\n---\n# x\n")
ent, _, ok := Extract("inbox/clips/x.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Tier)
}
func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) { func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) {
content := []byte(`--- content := []byte(`---
wing: homelab wing: homelab

View File

@@ -118,16 +118,18 @@ func (s *PGStore) UpsertEntity(ctx context.Context, e graph.Entity) error {
e.Type = "knowledge" e.Type = "knowledge"
} }
_, err := s.pool.Exec(ctx, ` _, err := s.pool.Exec(ctx, `
INSERT INTO brain_entities (slug, type, wing, hall, doc_path, title, updated_at) INSERT INTO brain_entities (slug, type, wing, hall, doc_path, title, tier, topic, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, now()) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, now())
ON CONFLICT (slug) DO UPDATE ON CONFLICT (slug) DO UPDATE
SET type = EXCLUDED.type, SET type = EXCLUDED.type,
wing = EXCLUDED.wing, wing = EXCLUDED.wing,
hall = EXCLUDED.hall, hall = EXCLUDED.hall,
doc_path = EXCLUDED.doc_path, doc_path = EXCLUDED.doc_path,
title = EXCLUDED.title, title = EXCLUDED.title,
tier = EXCLUDED.tier,
topic = EXCLUDED.topic,
updated_at = now() updated_at = now()
`, e.Slug, e.Type, e.Wing, e.Hall, e.DocPath, e.Title) `, e.Slug, e.Type, e.Wing, e.Hall, e.DocPath, e.Title, e.Tier, e.Topic)
if err != nil { if err != nil {
return fmt.Errorf("upsert entity %q: %w", e.Slug, err) return fmt.Errorf("upsert entity %q: %w", e.Slug, err)
} }