feat(tools): pr_files_diff with caps

Returns per-file unified diff for a PR, capped at 20KB/file and 200KB
total response. Files exceeding per-file cap report truncated+omitted_lines;
files that would push the response over 200KB go to omitted_files.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mathias Bergqvist
2026-05-04 22:57:11 +02:00
parent d3d0fed6b1
commit e95e87e8e3
5 changed files with 435 additions and 0 deletions

View File

@@ -0,0 +1,183 @@
package tools_test
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
"gitea.d-ma.be/mathias/gitea-mcp/internal/tools"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// buildDiff builds a synthetic unified diff for a set of files.
// Each file gets `linesPerFile` added lines.
func buildDiff(files []string, linesPerFile int) string {
var sb strings.Builder
for _, f := range files {
fmt.Fprintf(&sb, "diff --git a/%s b/%s\n", f, f)
fmt.Fprintf(&sb, "--- a/%s\n+++ b/%s\n", f, f)
fmt.Fprintf(&sb, "@@ -0,0 +1,%d @@\n", linesPerFile)
sb.WriteString(strings.Repeat("+abcdefghij\n", linesPerFile))
}
return sb.String()
}
// buildFilesJSON builds the JSON list of PullRequestFile objects.
func buildFilesJSON(files []string, additions int) string {
entries := make([]string, len(files))
for i, f := range files {
entries[i] = fmt.Sprintf(`{"filename":%q,"status":"modified","additions":%d,"deletions":0}`, f, additions)
}
return "[" + strings.Join(entries, ",") + "]"
}
// newPRFilesDiffServer creates a test server that serves both the /files and .diff endpoints.
func newPRFilesDiffServer(t *testing.T, filesJSON, rawDiff string) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.URL.Path == "/api/v1/repos/o/r/pulls/1/files":
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(filesJSON))
case r.URL.Path == "/api/v1/repos/o/r/pulls/1.diff":
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(rawDiff))
default:
t.Errorf("unexpected request: %s", r.URL.Path)
w.WriteHeader(http.StatusNotFound)
}
}))
}
func TestPRFilesDiffSmall(t *testing.T) {
// Two files, each ~120 bytes of diff — well under per-file and total caps.
fileNames := []string{"main.go", "util.go"}
// ~10 lines each = ~120 bytes per file diff
rawDiff := buildDiff(fileNames, 10)
filesJSON := buildFilesJSON(fileNames, 10)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
} `json:"files"`
OmittedFiles []string `json:"omitted_files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
assert.Len(t, out.Files, 2)
assert.Empty(t, out.OmittedFiles)
assert.False(t, out.ResponseTruncated)
for _, f := range out.Files {
assert.False(t, f.Truncated, "file %s should not be truncated", f.Path)
assert.NotEmpty(t, f.Diff)
assert.Equal(t, 10, f.Additions)
assert.Equal(t, 0, f.Deletions)
}
paths := []string{out.Files[0].Path, out.Files[1].Path}
assert.ElementsMatch(t, fileNames, paths)
}
func TestPRFilesDiffPerFileTruncated(t *testing.T) {
// One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines).
fileNames := []string{"bigfile.go"}
linesPerFile := 2560 // ~30720 bytes > 20KB cap
rawDiff := buildDiff(fileNames, linesPerFile)
filesJSON := buildFilesJSON(fileNames, linesPerFile)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
OmittedLines int `json:"omitted_lines"`
Additions int `json:"additions"`
} `json:"files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
require.Len(t, out.Files, 1)
f := out.Files[0]
assert.Equal(t, "bigfile.go", f.Path)
assert.True(t, f.Truncated, "file should be truncated")
assert.Greater(t, f.OmittedLines, 0, "omitted_lines should be > 0")
assert.LessOrEqual(t, len(f.Diff), 20*1024+200, "diff should be capped near 20KB")
assert.False(t, out.ResponseTruncated)
}
func TestPRFilesDiffResponseCapped(t *testing.T) {
// 25 files × ~10KB diff each = ~250KB raw, well over the 200KB response cap.
// Each file: 850 lines × 12 bytes = 10200 bytes per file.
numFiles := 25
linesPerFile := 850
fileNames := make([]string, numFiles)
for i := range fileNames {
fileNames[i] = fmt.Sprintf("file%02d.go", i)
}
rawDiff := buildDiff(fileNames, linesPerFile)
filesJSON := buildFilesJSON(fileNames, linesPerFile)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
} `json:"files"`
OmittedFiles []string `json:"omitted_files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
assert.True(t, out.ResponseTruncated, "response should be truncated")
assert.NotEmpty(t, out.OmittedFiles, "some files should be omitted")
assert.NotEmpty(t, out.Files, "some files should be included")
// Total files accounted for should equal numFiles.
totalAccountedFor := len(out.Files) + len(out.OmittedFiles)
assert.Equal(t, numFiles, totalAccountedFor)
}
func TestPRFilesDiffAllowlistRejects(t *testing.T) {
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"allowed"}))
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"evil","name":"r","number":1}`))
require.Error(t, err)
}
func TestPRFilesDiffRequiresValidNumber(t *testing.T) {
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"o"}))
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":0}`))
require.Error(t, err)
assert.ErrorIs(t, err, gitea.ErrValidation)
}