Files
gitea-mcp/internal/tools/pr_files_diff.go
Mathias 9013c8ff9c fix(pr_files_diff): copy per-file diff bytes to break buffer aliasing
splitUnifiedDiff used bytes.Buffer to accumulate each file's diff,
then stored buf.Bytes() into the result map and called buf.Reset()
to start the next file. bytes.Buffer.Bytes() returns the buffer's
internal backing slice; Reset() resets length to 0 but reuses the
same backing array. As a result, every map entry aliased the same
storage, so all files ended up showing the LAST file's diff content.

Fix: copy the bytes into a fresh slice before storing in the map.

Adds TestPRFilesDiffPerFileIsolation as a regression test that
asserts each file entry contains its OWN diff --git header and
none of the other files' headers. Verified failing on the prior
code, passing after the fix.

Closes #25
2026-05-16 23:24:16 +02:00

178 lines
4.5 KiB
Go

package tools
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"strings"
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
"gitea.d-ma.be/mathias/gitea-mcp/internal/registry"
)
const (
maxFileDiffBytes = 20 * 1024
maxResponseBytes = 200 * 1024
)
type PRFilesDiff struct {
c *gitea.Client
a *allowlist.Allowlist
}
func NewPRFilesDiff(c *gitea.Client, a *allowlist.Allowlist) *PRFilesDiff {
return &PRFilesDiff{c: c, a: a}
}
func (t *PRFilesDiff) Descriptor() registry.ToolDescriptor {
return registry.ToolDescriptor{
Name: "pr_files_diff",
Description: "Get a pull request's per-file diff with size caps (20KB/file, 200KB total).",
InputSchema: json.RawMessage(`{
"type":"object",
"properties":{
"owner":{"type":"string"},
"name":{"type":"string"},
"number":{"type":"integer","minimum":1}
},
"required":["owner","name","number"]
}`),
}
}
type prFilesDiffArgs struct {
Owner string `json:"owner"`
Name string `json:"name"`
Number int `json:"number"`
}
type prFileDiffEntry struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
OmittedLines int `json:"omitted_lines,omitempty"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
}
func (t *PRFilesDiff) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
var args prFilesDiffArgs
if err := parseArgs(raw, &args); err != nil {
return nil, err
}
if err := t.a.Check(args.Owner); err != nil {
return nil, err
}
if args.Number < 1 {
return nil, fmt.Errorf("number must be >= 1: %w", gitea.ErrValidation)
}
files, err := t.c.GetPullRequestFiles(ctx, args.Owner, args.Name, args.Number)
if err != nil {
return nil, err
}
rawDiff, err := t.c.GetPullRequestDiff(ctx, args.Owner, args.Name, args.Number)
if err != nil {
return nil, err
}
// Split unified diff by per-file headers ("diff --git a/path b/path")
perFile := splitUnifiedDiff(rawDiff)
out := struct {
Files []prFileDiffEntry `json:"files"`
OmittedFiles []string `json:"omitted_files,omitempty"`
ResponseTruncated bool `json:"response_truncated"`
}{
Files: make([]prFileDiffEntry, 0, len(files)),
}
totalBytes := 0
for _, f := range files {
// look up the diff for this file (best-effort by path match)
diffBytes, ok := perFile[f.Filename]
if !ok {
diffBytes = []byte{}
}
entry := prFileDiffEntry{
Path: f.Filename,
Additions: f.Additions,
Deletions: f.Deletions,
}
// Per-file cap
if len(diffBytes) > maxFileDiffBytes {
truncated := diffBytes[:maxFileDiffBytes]
omittedLines := bytes.Count(diffBytes[maxFileDiffBytes:], []byte("\n"))
entry.Diff = string(truncated)
entry.Truncated = true
entry.OmittedLines = omittedLines
} else {
entry.Diff = string(diffBytes)
}
// Response cap — if adding this entry would exceed, push to omitted_files
entryEstimate := len(entry.Diff) + 200 // small overhead for path + counts
if totalBytes+entryEstimate > maxResponseBytes {
out.OmittedFiles = append(out.OmittedFiles, f.Filename)
out.ResponseTruncated = true
continue
}
totalBytes += entryEstimate
out.Files = append(out.Files, entry)
}
return textOK(out)
}
// splitUnifiedDiff parses a unified diff and returns a map from filename to that file's
// portion of the diff. The unified diff format starts each file with a line like
// "diff --git a/<path> b/<path>".
func splitUnifiedDiff(d []byte) map[string][]byte {
m := map[string][]byte{}
scanner := bufio.NewScanner(bytes.NewReader(d))
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) // allow long diffs
var currentFile string
var current bytes.Buffer
flush := func() {
if currentFile != "" {
// Copy: bytes.Buffer.Bytes() returns the internal slice,
// which Reset() then reuses. Without the copy, every map
// entry ends up aliased to the last file's data.
b := current.Bytes()
cp := make([]byte, len(b))
copy(cp, b)
m[currentFile] = cp
current.Reset()
}
}
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "diff --git ") {
flush()
// Parse: "diff --git a/<path> b/<path>"
rest := strings.TrimPrefix(line, "diff --git a/")
parts := strings.SplitN(rest, " b/", 2)
if len(parts) == 2 {
currentFile = parts[0]
} else {
currentFile = ""
}
}
if currentFile != "" {
current.WriteString(line)
current.WriteByte('\n')
}
}
flush()
return m
}