feat(ingestion): add full-text wiki search package
Implements search.Query which walks brainDir/wiki/**/*.md, scores files by term-frequency across query tokens, and returns results sorted by score descending. Uses only stdlib — no external search deps. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,9 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|||||||
110
ingestion/internal/search/search.go
Normal file
110
ingestion/internal/search/search.go
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
// ingestion/internal/search/search.go
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Result is a single search hit from the brain wiki.
|
||||||
|
type Result struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Excerpt string `json:"excerpt"`
|
||||||
|
Score int `json:"score"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query searches all .md files under brainDir/wiki/ for pages containing
|
||||||
|
// any of the whitespace-separated terms in query. Returns up to limit results
|
||||||
|
// sorted by score descending.
|
||||||
|
func Query(brainDir, query string, limit int) ([]Result, error) {
|
||||||
|
if limit <= 0 {
|
||||||
|
limit = 5
|
||||||
|
}
|
||||||
|
terms := strings.Fields(strings.ToLower(query))
|
||||||
|
if len(terms) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var results []Result
|
||||||
|
|
||||||
|
err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error {
|
||||||
|
if err != nil || d.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil // skip unreadable files
|
||||||
|
}
|
||||||
|
|
||||||
|
lower := strings.ToLower(string(content))
|
||||||
|
score := 0
|
||||||
|
for _, term := range terms {
|
||||||
|
score += strings.Count(lower, term)
|
||||||
|
}
|
||||||
|
if score == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rel, _ := filepath.Rel(brainDir, path)
|
||||||
|
rel = filepath.ToSlash(rel)
|
||||||
|
|
||||||
|
results = append(results, Result{
|
||||||
|
Path: rel,
|
||||||
|
Title: extractTitle(string(content), d.Name()),
|
||||||
|
Excerpt: excerpt(string(content), 300),
|
||||||
|
Score: score,
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(results, func(i, j int) bool {
|
||||||
|
return results[i].Score > results[j].Score
|
||||||
|
})
|
||||||
|
if len(results) > limit {
|
||||||
|
results = results[:limit]
|
||||||
|
}
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractTitle(content, filename string) string {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||||
|
inFrontmatter := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFrontmatter {
|
||||||
|
inFrontmatter = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if inFrontmatter {
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if ok && strings.TrimSpace(key) == "title" {
|
||||||
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimSuffix(filename, ".md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func excerpt(content string, maxLen int) string {
|
||||||
|
// Skip frontmatter, return first maxLen chars of body.
|
||||||
|
parts := strings.SplitN(content, "---", 3)
|
||||||
|
body := content
|
||||||
|
if len(parts) == 3 {
|
||||||
|
body = strings.TrimSpace(parts[2])
|
||||||
|
}
|
||||||
|
if len(body) > maxLen {
|
||||||
|
return body[:maxLen] + "…"
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
54
ingestion/internal/search/search_test.go
Normal file
54
ingestion/internal/search/search_test.go
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// ingestion/internal/search/search_test.go
|
||||||
|
package search_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
|
||||||
|
// Write a concept page mentioning "retry"
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "concepts", "retry-logic.md"),
|
||||||
|
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
// Write an unrelated page
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "concepts", "database.md"),
|
||||||
|
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
results, err := search.Query(dir, "retry transient", 5)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, results, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path)
|
||||||
|
assert.Equal(t, "Retry Logic", results[0].Title)
|
||||||
|
assert.Greater(t, results[0].Score, 0)
|
||||||
|
assert.Contains(t, results[0].Excerpt, "Retry")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch_RespectsLimit(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)),
|
||||||
|
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
results, err := search.Query(dir, "retry", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.LessOrEqual(t, len(results), 3)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user