// Package graph extracts entity + edge records from brain markdown // documents for the brain_entities / brain_edges relational graph. // // The extractor is pure: it takes markdown bytes and a document path and // returns the entity (one per doc) and the wikilink edges (zero or more) // it found, with source line numbers so the graph store can record // provenance. // // Edge types in v1: only "wikilink" — derived from [[slug]] and // [[slug|Display]] occurrences in the body. Section-header edges are // deferred (see infra#62 grill addendum). package graph import ( "bufio" "bytes" "path/filepath" "regexp" "strings" ) // Entity represents one brain document for graph indexing. // // Slug is the basename without ".md" — the same identity used by // wiki canonicalization and the wikilink target syntax. // // Type categorises the doc into a coarse bucket so callers can filter // graph traversals (e.g. "only entity nodes"). When the doc lives // under brain/wiki///, Wing and Hall capture the // taxonomy; otherwise they're empty (legacy brain/knowledge/ docs). type Entity struct { DocPath string // forward-slash, relative to brainDir Slug string Type string // "concept" | "entity" | "source" | "hall" | "knowledge" Wing string // optional; from frontmatter or path Hall string // optional; from frontmatter or path Title string // optional; from frontmatter } // Edge represents a directed relationship between two slugs. // // SrcLine is the 1-indexed line in the source document where the link // was found, so callers can re-find the linking text after an edit. type Edge struct { SrcDoc string // forward-slash, relative to brainDir SrcSlug string // == Entity.Slug for SrcDoc DstSlug string EdgeType string // "wikilink" in v1 SrcLine int // 1-indexed } // linkRE matches both [[slug]] and [[slug|Display Name]] wikilinks. // Group 1 is the slug; group 2 (if present) is the display. var linkRE = regexp.MustCompile(`\[\[([^\]|]+)(?:\|([^\]]+))?\]\]`) // Extract parses one markdown document and returns its Entity plus the // outgoing wikilink Edges. docPath is forward-slash, relative to // brainDir; content is the raw markdown bytes. // // Returns ok=false when docPath does not yield a usable slug (e.g. // non-markdown file slipped through). func Extract(docPath string, content []byte) (Entity, []Edge, bool) { slug := slugFromPath(docPath) if slug == "" { return Entity{}, nil, false } ent := Entity{DocPath: docPath, Slug: slug} classifyByPath(&ent, docPath) readFrontmatter(&ent, content) edges := extractEdges(docPath, slug, content) return ent, edges, true } func slugFromPath(docPath string) string { base := filepath.Base(docPath) if !strings.HasSuffix(base, ".md") { return "" } return strings.TrimSuffix(base, ".md") } // classifyByPath fills Type / Wing / Hall from the path layout when the // doc lives under brain/wiki/. Layout: wiki///.md // or wiki//.md for the legacy concept/entity/source dirs. func classifyByPath(e *Entity, docPath string) { parts := strings.Split(docPath, "/") if len(parts) < 2 || parts[0] != "wiki" { e.Type = "knowledge" return } switch parts[1] { case "concepts": e.Type = "concept" case "entities": e.Type = "entity" case "sources": e.Type = "source" default: // wiki///.md e.Type = "hall" e.Wing = parts[1] if len(parts) >= 4 { e.Hall = parts[2] } } } // readFrontmatter pulls title/wing/hall from a YAML frontmatter block. // Frontmatter is optional; missing fields leave the entity unchanged. func readFrontmatter(e *Entity, content []byte) { scanner := bufio.NewScanner(bytes.NewReader(content)) inFM := false for scanner.Scan() { line := scanner.Text() if strings.TrimSpace(line) == "---" { if !inFM { inFM = true continue } return } if !inFM { return } key, val, ok := strings.Cut(line, ":") if !ok { continue } v := strings.Trim(strings.TrimSpace(val), `"'`) switch strings.TrimSpace(key) { case "title": if e.Title == "" { e.Title = v } case "wing": if e.Wing == "" { e.Wing = v } case "hall": if e.Hall == "" { e.Hall = v } } } } func extractEdges(docPath, srcSlug string, content []byte) []Edge { var edges []Edge seen := make(map[string]struct{}) // dedupe (dst, line) scanner := bufio.NewScanner(bytes.NewReader(content)) line := 0 for scanner.Scan() { line++ matches := linkRE.FindAllStringSubmatch(scanner.Text(), -1) for _, m := range matches { dst := strings.TrimSpace(m[1]) if dst == "" || dst == srcSlug { continue } key := dst + "|" + itoa(line) if _, dup := seen[key]; dup { continue } seen[key] = struct{}{} edges = append(edges, Edge{ SrcDoc: docPath, SrcSlug: srcSlug, DstSlug: dst, EdgeType: "wikilink", SrcLine: line, }) } } return edges } // itoa avoids the fmt dependency on a hot path. Single-digit fast path // keeps overhead negligible for typical line counts. func itoa(n int) string { if n == 0 { return "0" } var buf [20]byte i := len(buf) neg := n < 0 if neg { n = -n } for n > 0 { i-- buf[i] = byte('0' + n%10) n /= 10 } if neg { i-- buf[i] = '-' } return string(buf[i:]) }