Files
hyperguild/ingestion/internal/search/search.go
Mathias 75685e7b67
All checks were successful
CI / Lint / Test / Vet (push) Successful in 11s
CI / Mirror to GitHub (push) Successful in 4s
feat(brain): structured wing/hall taxonomy + obsidian-compatible layout
Adds a two-dimensional address (wing, hall) to brain notes. A wing is a
topic domain (e.g. jepa-fx, hyperguild); a hall is one of a closed
vocabulary of memory types (facts, decisions, failures, hypotheses,
sources). Notes route to brain/wiki/<wing>/<hall>/<slug>.md with
wing/hall/created_at YAML frontmatter, making the directory a valid
Obsidian vault.

Changes:
- new package ingestion/internal/brain (NotePath, ValidHalls, Sanitise,
  BuildWingIndex, BuildAllWingIndexes)
- api.WriteNote refactored to WriteNoteOptions; wing+hall routes to
  brain/wiki/, otherwise falls back to brain/knowledge/ (legacy)
- search.Query → QueryOptions with optional Wing/Hall filtering; Result
  carries wing/hall extracted from frontmatter or path segments
- MCP tools brain_write and brain_query gain optional wing/hall params
  (hall enum-validated); new brain_index tool regenerates _index.md MOC
- POST /index REST endpoint mirrors brain_index
- brain_write auto-rebuilds the wing's _index.md after a wing+hall write
- scripts/migrate-brain-halls.sh migrates flat brain/wiki/{concepts,entities}/
  into the new layout (dry-run by default, --commit applies)

All existing tests pass; new tests cover wing/hall write routing, scope
filtering, invalid hall rejection, _index.md generation, and migration
script paths.

Closes hyperguild#1.
2026-05-18 20:47:08 +02:00

213 lines
5.0 KiB
Go

// ingestion/internal/search/search.go
package search
import (
"bufio"
"fmt"
"log/slog"
"os"
"path/filepath"
"sort"
"strings"
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
)
// Result is a single search hit from the brain wiki.
type Result struct {
Path string `json:"path"`
Title string `json:"title"`
Excerpt string `json:"excerpt"`
Score int `json:"score"`
Wing string `json:"wing,omitempty"`
Hall string `json:"hall,omitempty"`
}
// QueryOptions configures a search.
//
// When Wing is set, the walk is restricted to brain/wiki/<wing>/.
// When Hall is additionally set, the walk is restricted to
// brain/wiki/<wing>/<hall>/. Without either, the legacy walk over
// brain/knowledge/ and brain/wiki/ is used.
type QueryOptions struct {
Query string
Limit int
Wing string
Hall string
}
// Query searches the brain. Returns up to opts.Limit results sorted by
// score descending. Empty query returns nil.
func Query(brainDir string, opts QueryOptions) ([]Result, error) {
if opts.Limit <= 0 {
opts.Limit = 5
}
terms := strings.Fields(strings.ToLower(opts.Query))
if len(terms) == 0 {
return nil, nil
}
roots, err := resolveRoots(brainDir, opts.Wing, opts.Hall)
if err != nil {
return nil, err
}
var results []Result
for _, dir := range roots {
if _, statErr := os.Stat(dir); os.IsNotExist(statErr) {
continue
}
err := filepath.WalkDir(dir, func(path string, d os.DirEntry, err error) error {
if err != nil {
slog.Warn("search: skipping path", "path", path, "err", err)
return nil
}
if d.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
content, err := os.ReadFile(path)
if err != nil {
slog.Warn("search: skipping unreadable file", "path", path, "err", err)
return nil
}
lower := strings.ToLower(string(content))
score := 0
for _, term := range terms {
score += strings.Count(lower, term)
}
if score == 0 {
return nil
}
rel, err := filepath.Rel(brainDir, path)
if err != nil {
return fmt.Errorf("rel path: %w", err)
}
rel = filepath.ToSlash(rel)
wing, hall := extractWingHall(string(content), rel)
results = append(results, Result{
Path: rel,
Title: extractTitle(string(content), d.Name()),
Excerpt: excerpt(string(content), 300),
Score: score,
Wing: wing,
Hall: hall,
})
return nil
})
if err != nil {
return nil, err
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].Score > results[j].Score
})
if len(results) > opts.Limit {
results = results[:opts.Limit]
}
return results, nil
}
// resolveRoots returns the directories to walk for the given wing/hall
// filters. Validates hall against the closed vocabulary when set.
func resolveRoots(brainDir, wing, hall string) ([]string, error) {
if hall != "" && !brain.IsValidHall(hall) {
return nil, fmt.Errorf("invalid hall %q", hall)
}
if wing != "" {
w := brain.Sanitise(wing)
if w == "" {
return nil, fmt.Errorf("invalid wing %q", wing)
}
if hall != "" {
return []string{filepath.Join(brainDir, "wiki", w, hall)}, nil
}
return []string{filepath.Join(brainDir, "wiki", w)}, nil
}
if hall != "" {
return nil, fmt.Errorf("hall filter requires wing")
}
return []string{
filepath.Join(brainDir, "knowledge"),
filepath.Join(brainDir, "wiki"),
}, nil
}
// extractWingHall reads wing/hall from frontmatter first, falling back to
// path segments brain/wiki/<wing>/<hall>/.
func extractWingHall(content, relPath string) (wing, hall string) {
scanner := bufio.NewScanner(strings.NewReader(content))
inFrontmatter := false
for scanner.Scan() {
line := scanner.Text()
if strings.TrimSpace(line) == "---" {
if !inFrontmatter {
inFrontmatter = true
continue
}
break
}
if !inFrontmatter {
continue
}
key, val, ok := strings.Cut(line, ":")
if !ok {
continue
}
v := strings.Trim(strings.TrimSpace(val), `"'`)
switch strings.TrimSpace(key) {
case "wing":
wing = v
case "hall":
hall = v
}
}
if wing != "" && hall != "" {
return wing, hall
}
parts := strings.Split(relPath, "/")
if len(parts) >= 4 && parts[0] == "wiki" {
if wing == "" {
wing = parts[1]
}
if hall == "" && brain.IsValidHall(parts[2]) {
hall = parts[2]
}
}
return wing, hall
}
func extractTitle(content, filename string) string {
scanner := bufio.NewScanner(strings.NewReader(content))
inFrontmatter := false
for scanner.Scan() {
line := scanner.Text()
if strings.TrimSpace(line) == "---" {
if !inFrontmatter {
inFrontmatter = true
continue
}
break
}
if inFrontmatter {
key, val, ok := strings.Cut(line, ":")
if ok && strings.TrimSpace(key) == "title" {
return strings.Trim(strings.TrimSpace(val), `"'`)
}
}
}
return strings.TrimSuffix(filename, ".md")
}
func excerpt(content string, maxLen int) string {
parts := strings.SplitN(content, "---", 3)
body := content
if len(parts) == 3 {
body = strings.TrimSpace(parts[2])
}
if len(body) > maxLen {
return body[:maxLen] + "…"
}
return body
}