feat(claudewatcher): ingest Claude Code session transcripts into brain
New package internal/claudewatcher. The volume gate (24 turns/week of agentsquad logs vs 500/week gate) exposed that the real signal lives in daily Claude Code usage at ~/.claude/projects/*/<uuid>.jsonl, not in agentsquad output. This package captures that signal. See infra#73 Track E + hyperguild#27 for the full reframe. Components: - parser: tolerant JSONL parser over the observed Claude Code session schema (user / assistant / attachment / system + bookkeeping types). Skip-flag fast-paths queue-operation, last-prompt, permission-mode, ai-title, bridge-session, file-history-snapshot. - scrubber: 11-rule fail-closed regex set for credential shapes (bearer, postgres URIs, PEM, ssh-key, ghp_/sk-/sk-ant-/AKIA, homelab env tokens, SOPS markers). Drop turn + log on match. - cursor: postgres-backed claude_session_cursors table, keyed by (host, file_path) with byte_offset. Resumable across pod restarts. - watcher: poll loop. Walks SessionsDir, processes each .jsonl from its cursor offset, runs scrubber, emits a Batch per file to a Sink interface, advances cursor on successful Ingest. No classifier integration in this commit — every kept turn is emitted in a per-session batch. The cmd/server wiring (next commit) routes batches to brain/wiki/claude-sessions/facts/. Classifier-driven hall routing (decisions / failures / hypotheses) is a follow-up. 19 unit tests across parser + scrubber + watcher. task check green. Refs: infra#73, hyperguild#27
This commit is contained in:
157
ingestion/internal/claudewatcher/parser_test.go
Normal file
157
ingestion/internal/claudewatcher/parser_test.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package claudewatcher
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func collect(t *testing.T, body string) ([]Turn, int64, error) {
|
||||
t.Helper()
|
||||
var out []Turn
|
||||
end, err := ParseStream(strings.NewReader(body), 0, nil, func(tr Turn) error {
|
||||
out = append(out, tr)
|
||||
return nil
|
||||
})
|
||||
return out, end, err
|
||||
}
|
||||
|
||||
func TestParseStream_UserTurnStringContent(t *testing.T) {
|
||||
body := `{"type":"user","sessionId":"S","timestamp":"2026-05-25T07:00:00Z","message":"hello world"}
|
||||
`
|
||||
turns, end, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.Equal(t, "user", turns[0].Type)
|
||||
assert.Equal(t, "S", turns[0].SessionID)
|
||||
assert.Equal(t, "hello world", turns[0].Content)
|
||||
assert.False(t, turns[0].Skip)
|
||||
assert.Equal(t, int64(len(body)), end)
|
||||
}
|
||||
|
||||
func TestParseStream_UserTurnContentBlocks(t *testing.T) {
|
||||
body := `{"type":"user","sessionId":"S","timestamp":"2026-05-25T07:00:00Z","message":{"role":"user","content":[{"type":"text","text":"line 1"},{"type":"text","text":"line 2"}]}}
|
||||
`
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.Equal(t, "line 1\nline 2", turns[0].Content)
|
||||
}
|
||||
|
||||
func TestParseStream_AssistantToolUse(t *testing.T) {
|
||||
body := `{"type":"assistant","sessionId":"S","timestamp":"2026-05-25T07:00:00Z","message":{"content":[{"type":"text","text":"calling now"},{"type":"tool_use","name":"Edit","input":{}}]}}
|
||||
`
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.Equal(t, "Edit", turns[0].ToolName)
|
||||
assert.Contains(t, turns[0].Content, "calling now")
|
||||
assert.Contains(t, turns[0].Content, "[tool_use:Edit]")
|
||||
}
|
||||
|
||||
func TestParseStream_AssistantToolResult(t *testing.T) {
|
||||
body := `{"type":"user","sessionId":"S","timestamp":"2026-05-25T07:00:00Z","message":{"content":[{"type":"tool_result","content":"output of cmd"}]}}
|
||||
`
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.Contains(t, turns[0].Content, "[tool_result] output of cmd")
|
||||
}
|
||||
|
||||
func TestParseStream_SkipsBookkeepingTypes(t *testing.T) {
|
||||
body := strings.Join([]string{
|
||||
`{"type":"queue-operation","sessionId":"S","content":"x"}`,
|
||||
`{"type":"last-prompt","sessionId":"S","lastPrompt":"y"}`,
|
||||
`{"type":"permission-mode","sessionId":"S","permissionMode":"auto"}`,
|
||||
`{"type":"ai-title","sessionId":"S","aiTitle":"My session"}`,
|
||||
`{"type":"file-history-snapshot","messageId":"abc"}`,
|
||||
}, "\n") + "\n"
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 5)
|
||||
for _, tr := range turns {
|
||||
assert.True(t, tr.Skip, "expected Skip=true for %q", tr.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStream_UnknownTypeIsSkip(t *testing.T) {
|
||||
body := `{"type":"future-thing","sessionId":"S"}` + "\n"
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.True(t, turns[0].Skip)
|
||||
}
|
||||
|
||||
func TestParseStream_MalformedLineIsSkippedNotFatal(t *testing.T) {
|
||||
body := strings.Join([]string{
|
||||
`{"type":"user","sessionId":"S","message":"first"}`,
|
||||
`{not valid json`,
|
||||
`{"type":"user","sessionId":"S","message":"third"}`,
|
||||
}, "\n") + "\n"
|
||||
var warnings int
|
||||
var turns []Turn
|
||||
_, err := ParseStream(strings.NewReader(body), 0, func(format string, args ...any) {
|
||||
warnings++
|
||||
}, func(tr Turn) error {
|
||||
turns = append(turns, tr)
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 2, "first + third should make it through")
|
||||
assert.Equal(t, 1, warnings)
|
||||
}
|
||||
|
||||
func TestParseStream_EmitErrStopHaltsCleanly(t *testing.T) {
|
||||
body := strings.Join([]string{
|
||||
`{"type":"user","sessionId":"S","message":"a"}`,
|
||||
`{"type":"user","sessionId":"S","message":"b"}`,
|
||||
`{"type":"user","sessionId":"S","message":"c"}`,
|
||||
}, "\n") + "\n"
|
||||
count := 0
|
||||
end, err := ParseStream(strings.NewReader(body), 0, nil, func(tr Turn) error {
|
||||
count++
|
||||
if count == 2 {
|
||||
return ErrStop
|
||||
}
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 2, count)
|
||||
assert.Greater(t, end, int64(0))
|
||||
}
|
||||
|
||||
func TestParseStream_EmitOtherErrorPropagates(t *testing.T) {
|
||||
body := `{"type":"user","sessionId":"S","message":"a"}` + "\n"
|
||||
want := errors.New("boom")
|
||||
_, err := ParseStream(strings.NewReader(body), 0, nil, func(tr Turn) error {
|
||||
return want
|
||||
})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "boom")
|
||||
}
|
||||
|
||||
func TestParseStream_AttachmentHookEvent(t *testing.T) {
|
||||
body := `{"type":"attachment","sessionId":"S","timestamp":"2026-05-25T07:00:00Z","attachment":{"type":"hook_success","hookName":"SessionStart:startup","hookEvent":"SessionStart","content":"hook body"}}
|
||||
`
|
||||
turns, _, err := collect(t, body)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, turns, 1)
|
||||
assert.Equal(t, "hook body", turns[0].Content)
|
||||
}
|
||||
|
||||
func TestParseStream_OffsetAdvances(t *testing.T) {
|
||||
body := `{"type":"user","sessionId":"S","message":"a"}` + "\n" +
|
||||
`{"type":"user","sessionId":"S","message":"b"}` + "\n"
|
||||
var offsets []int64
|
||||
_, err := ParseStream(strings.NewReader(body), 100, nil, func(tr Turn) error {
|
||||
offsets = append(offsets, tr.OffsetAfter)
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, offsets, 2)
|
||||
assert.Greater(t, offsets[0], int64(100))
|
||||
assert.Greater(t, offsets[1], offsets[0])
|
||||
}
|
||||
Reference in New Issue
Block a user