fix(pr_files_diff): copy per-file diff bytes to break buffer aliasing

splitUnifiedDiff used bytes.Buffer to accumulate each file's diff,
then stored buf.Bytes() into the result map and called buf.Reset()
to start the next file. bytes.Buffer.Bytes() returns the buffer's
internal backing slice; Reset() resets length to 0 but reuses the
same backing array. As a result, every map entry aliased the same
storage, so all files ended up showing the LAST file's diff content.

Fix: copy the bytes into a fresh slice before storing in the map.

Adds TestPRFilesDiffPerFileIsolation as a regression test that
asserts each file entry contains its OWN diff --git header and
none of the other files' headers. Verified failing on the prior
code, passing after the fix.

Closes #25
This commit is contained in:
Mathias
2026-05-16 23:01:18 +02:00
parent f26f922c96
commit 9013c8ff9c
2 changed files with 48 additions and 1 deletions

View File

@@ -143,7 +143,13 @@ func splitUnifiedDiff(d []byte) map[string][]byte {
flush := func() {
if currentFile != "" {
m[currentFile] = current.Bytes()
// Copy: bytes.Buffer.Bytes() returns the internal slice,
// which Reset() then reuses. Without the copy, every map
// entry ends up aliased to the last file's data.
b := current.Bytes()
cp := make([]byte, len(b))
copy(cp, b)
m[currentFile] = cp
current.Reset()
}
}

View File

@@ -97,6 +97,47 @@ func TestPRFilesDiffSmall(t *testing.T) {
assert.ElementsMatch(t, fileNames, paths)
}
// Regression for issue #25: every file's diff entry must contain its OWN diff,
// not a shared buffer pointing at the last file. Prior bug: splitUnifiedDiff
// flushed bytes.Buffer.Bytes() into the map without copying, so every entry
// aliased the buffer's backing array and showed the last file's content.
func TestPRFilesDiffPerFileIsolation(t *testing.T) {
fileNames := []string{"alpha.go", "beta.go", "gamma.go", "delta.go"}
rawDiff := buildDiff(fileNames, 5)
filesJSON := buildFilesJSON(fileNames, 5)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
Diff string `json:"diff"`
} `json:"files"`
}
require.NoError(t, json.Unmarshal(result, &out))
require.Len(t, out.Files, len(fileNames))
for _, f := range out.Files {
expected := fmt.Sprintf("diff --git a/%s b/%s", f.Path, f.Path)
assert.Contains(t, f.Diff, expected,
"file %s diff must contain its own header, got: %.80q", f.Path, f.Diff)
// No other file's header should leak in.
for _, other := range fileNames {
if other == f.Path {
continue
}
otherHeader := fmt.Sprintf("diff --git a/%s b/%s", other, other)
assert.NotContains(t, f.Diff, otherHeader,
"file %s diff must NOT contain %s's header", f.Path, other)
}
}
}
func TestPRFilesDiffPerFileTruncated(t *testing.T) {
// One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines).
fileNames := []string{"bigfile.go"}