feat(ingestion): add content chunking and LLM JSON output parser
This commit is contained in:
36
ingestion/internal/pipeline/chunk_test.go
Normal file
36
ingestion/internal/pipeline/chunk_test.go
Normal file
@@ -0,0 +1,36 @@
|
||||
// ingestion/internal/pipeline/chunk_test.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestChunk_NoChunkingWhenZero(t *testing.T) {
|
||||
content := strings.Repeat("word ", 1000)
|
||||
chunks := Chunk(content, 0)
|
||||
assert.Len(t, chunks, 1)
|
||||
}
|
||||
|
||||
func TestChunk_SplitsAtParagraph(t *testing.T) {
|
||||
content := "First paragraph here.\n\nSecond paragraph here."
|
||||
chunks := Chunk(content, 40)
|
||||
assert.Len(t, chunks, 2)
|
||||
assert.Equal(t, "First paragraph here.", chunks[0])
|
||||
assert.Equal(t, "Second paragraph here.", chunks[1])
|
||||
}
|
||||
|
||||
func TestChunk_SingleLargeParagraph(t *testing.T) {
|
||||
content := strings.Repeat("x", 100)
|
||||
chunks := Chunk(content, 50)
|
||||
assert.Len(t, chunks, 1)
|
||||
}
|
||||
|
||||
func TestChunk_NoChunkingWhenContentFits(t *testing.T) {
|
||||
content := "Short content."
|
||||
chunks := Chunk(content, 1000)
|
||||
assert.Len(t, chunks, 1)
|
||||
assert.Equal(t, "Short content.", chunks[0])
|
||||
}
|
||||
Reference in New Issue
Block a user