Allows callers to provide pre-structured RawPage data directly, bypassing the LLM extraction step. The pipeline still handles slug computation, frontmatter, link canonicalization, source back-references, and dedup — only the extraction is skipped. Useful when a more capable model or manual curation produces the structured data. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
333 lines
10 KiB
Go
333 lines
10 KiB
Go
// ingestion/internal/api/handler_test.go
|
|
package api_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
|
)
|
|
|
|
// stubComplete returns a fixed JSON RawPage so tests never call a real LLM.
|
|
func stubComplete(_ context.Context, _, _ string) (string, error) {
|
|
return `[{"title":"Test Source","type":"source","subtype":"article","content":"## Summary\n\nSome content here.\n"}]`, nil
|
|
}
|
|
|
|
func stubPipelineCfg() pipeline.Config {
|
|
return pipeline.Config{
|
|
Complete: stubComplete,
|
|
ChunkSize: 0,
|
|
Schema: "# Test Schema\nwiki/sources/, wiki/concepts/, wiki/entities/",
|
|
}
|
|
}
|
|
|
|
func setup(t *testing.T) (string, *api.Handler) {
|
|
t.Helper()
|
|
dir := t.TempDir()
|
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
|
require.NoError(t, os.WriteFile(
|
|
filepath.Join(dir, "knowledge", "tdd.md"),
|
|
[]byte("---\ntitle: TDD\ndomain: software\n---\n\nTest-driven development is a discipline.\n"),
|
|
0o644,
|
|
))
|
|
logger := slog.New(slog.NewTextHandler(os.Stderr, nil))
|
|
return dir, api.NewHandler(dir, logger, stubPipelineCfg())
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Existing tests (Write / Query)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func TestQuery_ReturnsResults(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{"query": "test driven", "limit": 5})
|
|
req := httptest.NewRequest(http.MethodPost, "/query", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Query(rec, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
results := resp["results"].([]any)
|
|
assert.NotEmpty(t, results)
|
|
}
|
|
|
|
func TestWrite_CreatesKnowledgeFile(t *testing.T) {
|
|
dir, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{
|
|
"content": "# Test note\n\nSome content.",
|
|
"filename": "test-note.md",
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Write(rec, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]string
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
assert.NotEmpty(t, resp["path"])
|
|
|
|
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "test-note.md"))
|
|
require.NoError(t, err)
|
|
assert.Contains(t, string(content), "Some content.")
|
|
}
|
|
|
|
func TestQuery_RequiresQuery(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{"limit": 5})
|
|
req := httptest.NewRequest(http.MethodPost, "/query", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Query(rec, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
|
}
|
|
|
|
func TestWrite_IncludesFrontmatterWhenTypeProvided(t *testing.T) {
|
|
dir, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{
|
|
"content": "Some learning.",
|
|
"filename": "typed-note.md",
|
|
"type": "concept",
|
|
"domain": "software",
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Write(rec, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rec.Code)
|
|
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "typed-note.md"))
|
|
require.NoError(t, err)
|
|
assert.Contains(t, string(content), "type: concept")
|
|
assert.Contains(t, string(content), "domain: software")
|
|
assert.Contains(t, string(content), "Some learning.")
|
|
}
|
|
|
|
func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) {
|
|
dir, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{"content": "auto name"})
|
|
req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Write(rec, req)
|
|
|
|
assert.Equal(t, http.StatusOK, rec.Code)
|
|
entries, _ := os.ReadDir(filepath.Join(dir, "knowledge"))
|
|
// +1 because setup already wrote tdd.md
|
|
assert.Len(t, entries, 2)
|
|
assert.True(t, strings.HasSuffix(entries[1].Name(), ".md"))
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// POST /ingest
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func TestIngest_Validation(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
body map[string]any
|
|
}{
|
|
{"missing content", map[string]any{"source": "test-source"}},
|
|
{"missing source", map[string]any{"content": "some content"}},
|
|
{"whitespace content", map[string]any{"content": " ", "source": "test-source"}},
|
|
{"whitespace source", map[string]any{"content": "some content", "source": " "}},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(tc.body)
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Ingest(rec, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIngest_Success(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{
|
|
"content": "some content about shape-up methodology",
|
|
"source": "shape-up-book",
|
|
"dry_run": true,
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.Ingest(rec, req)
|
|
|
|
require.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
pages, ok := resp["pages"]
|
|
require.True(t, ok, "response must have pages field")
|
|
pagesSlice, ok := pages.([]any)
|
|
require.True(t, ok, "pages must be an array")
|
|
assert.NotEmpty(t, pagesSlice)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// POST /ingest-path
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func TestIngestPath_MissingPath(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{"source": "test-source"})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestPath(rec, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
|
}
|
|
|
|
func TestIngestPath_File(t *testing.T) {
|
|
_, h := setup(t)
|
|
|
|
// Create a temp file with content
|
|
dir := t.TempDir()
|
|
f := filepath.Join(dir, "doc.md")
|
|
require.NoError(t, os.WriteFile(f, []byte("# Hello\nThis is markdown content."), 0o644))
|
|
|
|
body, _ := json.Marshal(map[string]any{
|
|
"path": f,
|
|
"source": "test-doc",
|
|
"dry_run": true,
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestPath(rec, req)
|
|
|
|
require.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
pages, ok := resp["pages"]
|
|
require.True(t, ok, "response must have pages field")
|
|
pagesSlice, ok := pages.([]any)
|
|
require.True(t, ok, "pages must be an array")
|
|
assert.NotEmpty(t, pagesSlice)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// POST /ingest-raw
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func TestIngestRaw_Validation(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
body map[string]any
|
|
}{
|
|
{"missing source", map[string]any{"pages": []any{map[string]any{"title": "X", "type": "concept", "content": "x"}}}},
|
|
{"missing pages", map[string]any{"source": "test-source"}},
|
|
{"empty pages", map[string]any{"source": "test-source", "pages": []any{}}},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
_, h := setup(t)
|
|
body, _ := json.Marshal(tc.body)
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-raw", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestRaw(rec, req)
|
|
|
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIngestRaw_Success(t *testing.T) {
|
|
dir, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{
|
|
"source": "test-article",
|
|
"pages": []any{
|
|
map[string]any{"title": "Test Article", "type": "source", "subtype": "article", "domain": "Testing", "content": "## Summary\n\nThis is a test article about [[Test Concept]].\n"},
|
|
map[string]any{"title": "Test Concept", "type": "concept", "domain": "Testing", "content": "A concept for testing.\n"},
|
|
},
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-raw", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestRaw(rec, req)
|
|
|
|
require.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
pages := resp["pages"].([]any)
|
|
assert.Len(t, pages, 2)
|
|
|
|
// Verify files were written
|
|
sourcePath := filepath.Join(dir, "wiki", "sources", "test-article.md")
|
|
assert.FileExists(t, sourcePath)
|
|
conceptPath := filepath.Join(dir, "wiki", "concepts", "test-concept.md")
|
|
assert.FileExists(t, conceptPath)
|
|
}
|
|
|
|
func TestIngestRaw_DryRun(t *testing.T) {
|
|
dir, h := setup(t)
|
|
body, _ := json.Marshal(map[string]any{
|
|
"source": "dry-run-test",
|
|
"pages": []any{
|
|
map[string]any{"title": "Dry Run Source", "type": "source", "subtype": "article", "content": "Content."},
|
|
},
|
|
"dry_run": true,
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-raw", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestRaw(rec, req)
|
|
|
|
require.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
pages := resp["pages"].([]any)
|
|
assert.NotEmpty(t, pages)
|
|
|
|
// Verify no files were written
|
|
sourcePath := filepath.Join(dir, "wiki", "sources", "dry-run-test.md")
|
|
assert.NoFileExists(t, sourcePath)
|
|
}
|
|
|
|
func TestIngestPath_Directory(t *testing.T) {
|
|
_, h := setup(t)
|
|
|
|
// Create a temp dir with one .md file
|
|
dir := t.TempDir()
|
|
require.NoError(t, os.WriteFile(filepath.Join(dir, "notes.md"), []byte("# Notes\nSome notes."), 0o644))
|
|
|
|
body, _ := json.Marshal(map[string]any{
|
|
"path": dir,
|
|
"dry_run": true,
|
|
})
|
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
|
rec := httptest.NewRecorder()
|
|
|
|
h.IngestPath(rec, req)
|
|
|
|
require.Equal(t, http.StatusOK, rec.Code)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
|
pages, ok := resp["pages"]
|
|
require.True(t, ok, "response must have pages field")
|
|
pagesSlice, ok := pages.([]any)
|
|
require.True(t, ok, "pages must be an array")
|
|
assert.NotEmpty(t, pagesSlice)
|
|
}
|