feat(ingestion): add full-text wiki search package

Implements search.Query which walks brainDir/wiki/**/*.md, scores files
by term-frequency across query tokens, and returns results sorted by
score descending. Uses only stdlib — no external search deps.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mathias Bergqvist
2026-04-17 20:18:57 +02:00
parent 6c485489bf
commit 3c1f6edf3e
3 changed files with 167 additions and 0 deletions

View File

@@ -1,6 +1,9 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,110 @@
// ingestion/internal/search/search.go
package search
import (
"bufio"
"os"
"path/filepath"
"sort"
"strings"
)
// Result is a single search hit from the brain wiki.
type Result struct {
Path string `json:"path"`
Title string `json:"title"`
Excerpt string `json:"excerpt"`
Score int `json:"score"`
}
// Query searches all .md files under brainDir/wiki/ for pages containing
// any of the whitespace-separated terms in query. Returns up to limit results
// sorted by score descending.
func Query(brainDir, query string, limit int) ([]Result, error) {
if limit <= 0 {
limit = 5
}
terms := strings.Fields(strings.ToLower(query))
if len(terms) == 0 {
return nil, nil
}
var results []Result
err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error {
if err != nil || d.IsDir() || !strings.HasSuffix(path, ".md") {
return err
}
content, err := os.ReadFile(path)
if err != nil {
return nil // skip unreadable files
}
lower := strings.ToLower(string(content))
score := 0
for _, term := range terms {
score += strings.Count(lower, term)
}
if score == 0 {
return nil
}
rel, _ := filepath.Rel(brainDir, path)
rel = filepath.ToSlash(rel)
results = append(results, Result{
Path: rel,
Title: extractTitle(string(content), d.Name()),
Excerpt: excerpt(string(content), 300),
Score: score,
})
return nil
})
if err != nil {
return nil, err
}
sort.Slice(results, func(i, j int) bool {
return results[i].Score > results[j].Score
})
if len(results) > limit {
results = results[:limit]
}
return results, nil
}
func extractTitle(content, filename string) string {
scanner := bufio.NewScanner(strings.NewReader(content))
inFrontmatter := false
for scanner.Scan() {
line := scanner.Text()
if strings.TrimSpace(line) == "---" {
if !inFrontmatter {
inFrontmatter = true
continue
}
break
}
if inFrontmatter {
key, val, ok := strings.Cut(line, ":")
if ok && strings.TrimSpace(key) == "title" {
return strings.Trim(strings.TrimSpace(val), `"'`)
}
}
}
return strings.TrimSuffix(filename, ".md")
}
func excerpt(content string, maxLen int) string {
// Skip frontmatter, return first maxLen chars of body.
parts := strings.SplitN(content, "---", 3)
body := content
if len(parts) == 3 {
body = strings.TrimSpace(parts[2])
}
if len(body) > maxLen {
return body[:maxLen] + "…"
}
return body
}

View File

@@ -0,0 +1,54 @@
// ingestion/internal/search/search_test.go
package search_test
import (
"fmt"
"os"
"path/filepath"
"testing"
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSearch_ReturnsMatchingPages(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
// Write a concept page mentioning "retry"
require.NoError(t, os.WriteFile(
filepath.Join(dir, "wiki", "concepts", "retry-logic.md"),
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
0o644,
))
// Write an unrelated page
require.NoError(t, os.WriteFile(
filepath.Join(dir, "wiki", "concepts", "database.md"),
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
0o644,
))
results, err := search.Query(dir, "retry transient", 5)
require.NoError(t, err)
require.Len(t, results, 1)
assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path)
assert.Equal(t, "Retry Logic", results[0].Title)
assert.Greater(t, results[0].Score, 0)
assert.Contains(t, results[0].Excerpt, "Retry")
}
func TestSearch_RespectsLimit(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
for i := 0; i < 5; i++ {
require.NoError(t, os.WriteFile(
filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)),
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
0o644,
))
}
results, err := search.Query(dir, "retry", 3)
require.NoError(t, err)
assert.LessOrEqual(t, len(results), 3)
}