Pre-rollout guard. Source code stays clean — client identities come from CLAUDE_INGEST_CLIENT_BLOCK env (sourced from a SOPS-encrypted k8s secret in infra repo). Env value is a regex alternation; main wraps it with `(?i)\b(...)\b` so word-boundary matching avoids false hits inside longer identifiers (e.g. "Sebastian" doesn't trigger on "SEB"). DefaultRules (credential shapes) still take precedence so any leak that's BOTH a client mention AND a credential shape logs as the credential — strictly more dangerous, points triage at the right thing. Tests cover precedence + case variations + word-boundary respect + invalid-pattern rejection. Refs: infra#73 Track E.1 pre-rollout grill (option B). Bump-Type: minor
118 lines
4.1 KiB
Go
118 lines
4.1 KiB
Go
package claudewatcher
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestScrub_PoisonedFixtures(t *testing.T) {
|
|
// One representative bad-string per rule. If a rule fires for the
|
|
// wrong content shape later, this table localises the regression.
|
|
cases := []struct {
|
|
name string
|
|
content string
|
|
want string
|
|
}{
|
|
{"bearer-token", "curl -H 'Authorization: Bearer abcdef1234567890ghijklmnop'", "authorization-header"},
|
|
{"bearer-no-header", "header = Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig", "bearer-token"},
|
|
{"postgres-uri", "DATABASE_URL=postgres://user:s3cret@10.0.1.20:5432/brain", "postgres-uri-with-password"},
|
|
{"private-key", "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAA", "private-key"},
|
|
{"ssh-public", "deploy: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK1234567890abcdefghij user@host", "ssh-key"},
|
|
{"github-pat-classic", "GH_TOKEN=ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt", "github-pat"},
|
|
{"openai-key", "OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII", "openai-sk"},
|
|
{"anthropic-key", "ANTHROPIC_API_KEY=sk-ant-api03-aaaaBBBBccccDDDDeeeeFFFFggggHHHHiiiiJJJJkkkk", "anthropic-sk"},
|
|
{"aws-access-key", "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE", "aws-access-key"},
|
|
{"homelab-env", "POSTGRES_PASSWORD=hunter2supersecretvalue", "homelab-env-token"},
|
|
{"sops-marker", "value: ENC[AES256_GCM,data:abc123def456,iv:zzz]", "sops-encrypted-marker"},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
got := Scrub(tc.content)
|
|
assert.Equal(t, tc.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestScrub_CleanContentPassesThrough(t *testing.T) {
|
|
cases := []string{
|
|
"",
|
|
"plain text with no credentials",
|
|
"a 40 char hex string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa is fine in isolation",
|
|
"`Bearer` token mentioned in docs without an actual value",
|
|
"file at ~/.ssh/id_ed25519",
|
|
"the function Authorization() takes no args",
|
|
"comment: see API key in 1Password",
|
|
}
|
|
for _, c := range cases {
|
|
assert.Empty(t, Scrub(c), "expected clean for %q", c)
|
|
}
|
|
}
|
|
|
|
func TestScrub_FirstMatchWins(t *testing.T) {
|
|
// Content matching multiple rules: report the first rule order in
|
|
// DefaultRules. Stability matters for log triage.
|
|
content := "Authorization: Bearer ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt"
|
|
assert.Equal(t, "authorization-header", Scrub(content))
|
|
}
|
|
|
|
func TestRegisterRule_ClientNameGuard(t *testing.T) {
|
|
t.Cleanup(ResetExtraRules)
|
|
require := func(err error) {
|
|
if err != nil {
|
|
t.Fatalf("unexpected err: %v", err)
|
|
}
|
|
}
|
|
require(RegisterRule("client-name", `(?i)\b(SEB|Mastercard)\b`))
|
|
|
|
// Hits — case variations + word-boundary respect.
|
|
for _, hit := range []string{
|
|
"mentioned SEB in this commit",
|
|
"the Mastercard project deadline",
|
|
"working on mastercard scope",
|
|
"SEB internal review",
|
|
} {
|
|
assert.Equal(t, "client-name", Scrub(hit), "should match %q", hit)
|
|
}
|
|
|
|
// Misses — substring within a longer word should NOT match
|
|
// thanks to \b. "Sebastian" contains "seb" but \b prevents hit.
|
|
for _, miss := range []string{
|
|
"Sebastian wrote the docs",
|
|
"unrelated text",
|
|
"researcher",
|
|
"https://example.com/search?seb=1", // 'seb' bounded by ?=, still matches \b
|
|
} {
|
|
got := Scrub(miss)
|
|
if miss == "https://example.com/search?seb=1" {
|
|
// `seb=` has word-boundary at '='; this DOES match \bseb\b.
|
|
// Accept either outcome; document the tradeoff.
|
|
assert.Contains(t, []string{"", "client-name"}, got)
|
|
continue
|
|
}
|
|
assert.Empty(t, got, "should NOT match %q", miss)
|
|
}
|
|
}
|
|
|
|
func TestRegisterRule_CredentialsTakePrecedence(t *testing.T) {
|
|
t.Cleanup(ResetExtraRules)
|
|
require := func(err error) {
|
|
if err != nil {
|
|
t.Fatalf("unexpected err: %v", err)
|
|
}
|
|
}
|
|
require(RegisterRule("client-name", `\b(SEB)\b`))
|
|
|
|
// Content matches both a credential rule AND a client rule —
|
|
// credential rule wins by ordering, so log triage points at the
|
|
// strictly more dangerous leak.
|
|
content := "SEB project uses OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII"
|
|
assert.Equal(t, "openai-sk", Scrub(content))
|
|
}
|
|
|
|
func TestRegisterRule_RejectsInvalidPattern(t *testing.T) {
|
|
t.Cleanup(ResetExtraRules)
|
|
err := RegisterRule("bad", "[unclosed")
|
|
assert.Error(t, err)
|
|
}
|