diff --git a/ingestion/go.sum b/ingestion/go.sum index aa256bf..cc8b3f4 100644 --- a/ingestion/go.sum +++ b/ingestion/go.sum @@ -1,6 +1,9 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ingestion/internal/search/search.go b/ingestion/internal/search/search.go new file mode 100644 index 0000000..764a323 --- /dev/null +++ b/ingestion/internal/search/search.go @@ -0,0 +1,110 @@ +// ingestion/internal/search/search.go +package search + +import ( + "bufio" + "os" + "path/filepath" + "sort" + "strings" +) + +// Result is a single search hit from the brain wiki. +type Result struct { + Path string `json:"path"` + Title string `json:"title"` + Excerpt string `json:"excerpt"` + Score int `json:"score"` +} + +// Query searches all .md files under brainDir/wiki/ for pages containing +// any of the whitespace-separated terms in query. Returns up to limit results +// sorted by score descending. +func Query(brainDir, query string, limit int) ([]Result, error) { + if limit <= 0 { + limit = 5 + } + terms := strings.Fields(strings.ToLower(query)) + if len(terms) == 0 { + return nil, nil + } + + var results []Result + + err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error { + if err != nil || d.IsDir() || !strings.HasSuffix(path, ".md") { + return err + } + + content, err := os.ReadFile(path) + if err != nil { + return nil // skip unreadable files + } + + lower := strings.ToLower(string(content)) + score := 0 + for _, term := range terms { + score += strings.Count(lower, term) + } + if score == 0 { + return nil + } + + rel, _ := filepath.Rel(brainDir, path) + rel = filepath.ToSlash(rel) + + results = append(results, Result{ + Path: rel, + Title: extractTitle(string(content), d.Name()), + Excerpt: excerpt(string(content), 300), + Score: score, + }) + return nil + }) + if err != nil { + return nil, err + } + + sort.Slice(results, func(i, j int) bool { + return results[i].Score > results[j].Score + }) + if len(results) > limit { + results = results[:limit] + } + return results, nil +} + +func extractTitle(content, filename string) string { + scanner := bufio.NewScanner(strings.NewReader(content)) + inFrontmatter := false + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == "---" { + if !inFrontmatter { + inFrontmatter = true + continue + } + break + } + if inFrontmatter { + key, val, ok := strings.Cut(line, ":") + if ok && strings.TrimSpace(key) == "title" { + return strings.Trim(strings.TrimSpace(val), `"'`) + } + } + } + return strings.TrimSuffix(filename, ".md") +} + +func excerpt(content string, maxLen int) string { + // Skip frontmatter, return first maxLen chars of body. + parts := strings.SplitN(content, "---", 3) + body := content + if len(parts) == 3 { + body = strings.TrimSpace(parts[2]) + } + if len(body) > maxLen { + return body[:maxLen] + "…" + } + return body +} diff --git a/ingestion/internal/search/search_test.go b/ingestion/internal/search/search_test.go new file mode 100644 index 0000000..e8a2305 --- /dev/null +++ b/ingestion/internal/search/search_test.go @@ -0,0 +1,54 @@ +// ingestion/internal/search/search_test.go +package search_test + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/search" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSearch_ReturnsMatchingPages(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755)) + + // Write a concept page mentioning "retry" + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", "retry-logic.md"), + []byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"), + 0o644, + )) + // Write an unrelated page + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", "database.md"), + []byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"), + 0o644, + )) + + results, err := search.Query(dir, "retry transient", 5) + require.NoError(t, err) + require.Len(t, results, 1) + assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path) + assert.Equal(t, "Retry Logic", results[0].Title) + assert.Greater(t, results[0].Score, 0) + assert.Contains(t, results[0].Excerpt, "Retry") +} + +func TestSearch_RespectsLimit(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755)) + for i := 0; i < 5; i++ { + require.NoError(t, os.WriteFile( + filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)), + []byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)), + 0o644, + )) + } + results, err := search.Query(dir, "retry", 3) + require.NoError(t, err) + assert.LessOrEqual(t, len(results), 3) +}