feat(claudewatcher): client-name guard via RegisterRule + env

Pre-rollout guard. Source code stays clean — client identities come
from CLAUDE_INGEST_CLIENT_BLOCK env (sourced from a SOPS-encrypted k8s
secret in infra repo). Env value is a regex alternation; main wraps
it with `(?i)\b(...)\b` so word-boundary matching avoids false hits
inside longer identifiers (e.g. "Sebastian" doesn't trigger on "SEB").

DefaultRules (credential shapes) still take precedence so any leak
that's BOTH a client mention AND a credential shape logs as the
credential — strictly more dangerous, points triage at the right
thing. Tests cover precedence + case variations + word-boundary
respect + invalid-pattern rejection.

Refs: infra#73 Track E.1 pre-rollout grill (option B).

Bump-Type: minor
This commit is contained in:
Mathias
2026-05-26 07:10:05 +02:00
parent f8cf27e5de
commit a94b860c2e
3 changed files with 126 additions and 1 deletions

View File

@@ -55,3 +55,63 @@ func TestScrub_FirstMatchWins(t *testing.T) {
content := "Authorization: Bearer ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt"
assert.Equal(t, "authorization-header", Scrub(content))
}
func TestRegisterRule_ClientNameGuard(t *testing.T) {
t.Cleanup(ResetExtraRules)
require := func(err error) {
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
}
require(RegisterRule("client-name", `(?i)\b(SEB|Mastercard)\b`))
// Hits — case variations + word-boundary respect.
for _, hit := range []string{
"mentioned SEB in this commit",
"the Mastercard project deadline",
"working on mastercard scope",
"SEB internal review",
} {
assert.Equal(t, "client-name", Scrub(hit), "should match %q", hit)
}
// Misses — substring within a longer word should NOT match
// thanks to \b. "Sebastian" contains "seb" but \b prevents hit.
for _, miss := range []string{
"Sebastian wrote the docs",
"unrelated text",
"researcher",
"https://example.com/search?seb=1", // 'seb' bounded by ?=, still matches \b
} {
got := Scrub(miss)
if miss == "https://example.com/search?seb=1" {
// `seb=` has word-boundary at '='; this DOES match \bseb\b.
// Accept either outcome; document the tradeoff.
assert.Contains(t, []string{"", "client-name"}, got)
continue
}
assert.Empty(t, got, "should NOT match %q", miss)
}
}
func TestRegisterRule_CredentialsTakePrecedence(t *testing.T) {
t.Cleanup(ResetExtraRules)
require := func(err error) {
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
}
require(RegisterRule("client-name", `\b(SEB)\b`))
// Content matches both a credential rule AND a client rule —
// credential rule wins by ordering, so log triage points at the
// strictly more dangerous leak.
content := "SEB project uses OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII"
assert.Equal(t, "openai-sk", Scrub(content))
}
func TestRegisterRule_RejectsInvalidPattern(t *testing.T) {
t.Cleanup(ResetExtraRules)
err := RegisterRule("bad", "[unclosed")
assert.Error(t, err)
}