From 9013c8ff9cd1d7b5848e8e6bd5bdd4a30a049991 Mon Sep 17 00:00:00 2001 From: Mathias Date: Sat, 16 May 2026 23:01:18 +0200 Subject: [PATCH] fix(pr_files_diff): copy per-file diff bytes to break buffer aliasing splitUnifiedDiff used bytes.Buffer to accumulate each file's diff, then stored buf.Bytes() into the result map and called buf.Reset() to start the next file. bytes.Buffer.Bytes() returns the buffer's internal backing slice; Reset() resets length to 0 but reuses the same backing array. As a result, every map entry aliased the same storage, so all files ended up showing the LAST file's diff content. Fix: copy the bytes into a fresh slice before storing in the map. Adds TestPRFilesDiffPerFileIsolation as a regression test that asserts each file entry contains its OWN diff --git header and none of the other files' headers. Verified failing on the prior code, passing after the fix. Closes #25 --- internal/tools/pr_files_diff.go | 8 +++++- internal/tools/pr_files_diff_test.go | 41 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/internal/tools/pr_files_diff.go b/internal/tools/pr_files_diff.go index 0400802..12f131a 100644 --- a/internal/tools/pr_files_diff.go +++ b/internal/tools/pr_files_diff.go @@ -143,7 +143,13 @@ func splitUnifiedDiff(d []byte) map[string][]byte { flush := func() { if currentFile != "" { - m[currentFile] = current.Bytes() + // Copy: bytes.Buffer.Bytes() returns the internal slice, + // which Reset() then reuses. Without the copy, every map + // entry ends up aliased to the last file's data. + b := current.Bytes() + cp := make([]byte, len(b)) + copy(cp, b) + m[currentFile] = cp current.Reset() } } diff --git a/internal/tools/pr_files_diff_test.go b/internal/tools/pr_files_diff_test.go index 02bec0f..0e1a61a 100644 --- a/internal/tools/pr_files_diff_test.go +++ b/internal/tools/pr_files_diff_test.go @@ -97,6 +97,47 @@ func TestPRFilesDiffSmall(t *testing.T) { assert.ElementsMatch(t, fileNames, paths) } +// Regression for issue #25: every file's diff entry must contain its OWN diff, +// not a shared buffer pointing at the last file. Prior bug: splitUnifiedDiff +// flushed bytes.Buffer.Bytes() into the map without copying, so every entry +// aliased the buffer's backing array and showed the last file's content. +func TestPRFilesDiffPerFileIsolation(t *testing.T) { + fileNames := []string{"alpha.go", "beta.go", "gamma.go", "delta.go"} + rawDiff := buildDiff(fileNames, 5) + filesJSON := buildFilesJSON(fileNames, 5) + + srv := newPRFilesDiffServer(t, filesJSON, rawDiff) + defer srv.Close() + + tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"})) + result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`)) + require.NoError(t, err) + + var out struct { + Files []struct { + Path string `json:"path"` + Diff string `json:"diff"` + } `json:"files"` + } + require.NoError(t, json.Unmarshal(result, &out)) + require.Len(t, out.Files, len(fileNames)) + + for _, f := range out.Files { + expected := fmt.Sprintf("diff --git a/%s b/%s", f.Path, f.Path) + assert.Contains(t, f.Diff, expected, + "file %s diff must contain its own header, got: %.80q", f.Path, f.Diff) + // No other file's header should leak in. + for _, other := range fileNames { + if other == f.Path { + continue + } + otherHeader := fmt.Sprintf("diff --git a/%s b/%s", other, other) + assert.NotContains(t, f.Diff, otherHeader, + "file %s diff must NOT contain %s's header", f.Path, other) + } + } +} + func TestPRFilesDiffPerFileTruncated(t *testing.T) { // One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines). fileNames := []string{"bigfile.go"}