feat(tools): pr_files_diff with caps
Returns per-file unified diff for a PR, capped at 20KB/file and 200KB total response. Files exceeding per-file cap report truncated+omitted_lines; files that would push the response over 200KB go to omitted_files. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,7 @@ func main() {
|
||||
reg.Register(tools.NewIssueCreate(giteaClient, ownerAllow))
|
||||
reg.Register(tools.NewIssueComment(giteaClient, ownerAllow))
|
||||
reg.Register(tools.NewPRComment(giteaClient, ownerAllow))
|
||||
reg.Register(tools.NewPRFilesDiff(giteaClient, ownerAllow))
|
||||
|
||||
mcpSrv := mcp.NewServer(mcp.ServerOptions{
|
||||
Registry: reg,
|
||||
|
||||
@@ -64,3 +64,40 @@ func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, index i
|
||||
}
|
||||
return &pr, nil
|
||||
}
|
||||
|
||||
type PullRequestFile struct {
|
||||
Filename string `json:"filename"`
|
||||
Status string `json:"status"` // added | modified | deleted | renamed
|
||||
Additions int `json:"additions"`
|
||||
Deletions int `json:"deletions"`
|
||||
}
|
||||
|
||||
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, index int) ([]PullRequestFile, error) {
|
||||
p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d/files", owner, repo, index)
|
||||
body, status, err := c.GetJSON(ctx, p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := MapStatus(status, body); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var files []PullRequestFile
|
||||
if err := json.Unmarshal(body, &files); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// GetPullRequestDiff returns the raw unified diff. The endpoint serves text/plain, not JSON,
|
||||
// so we use doRaw to bypass the json Accept header expectation.
|
||||
func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, index int) ([]byte, error) {
|
||||
p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d.diff", owner, repo, index)
|
||||
resp, err := c.doRaw(ctx, "GET", p, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := MapStatus(resp.Status, resp.Body); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
@@ -93,3 +93,46 @@ func TestGetPullRequest(t *testing.T) {
|
||||
assert.Equal(t, "open", pr.State)
|
||||
assert.True(t, pr.Draft)
|
||||
}
|
||||
|
||||
func TestGetPullRequestFiles(t *testing.T) {
|
||||
filesJSON := `[
|
||||
{"filename":"main.go","status":"modified","additions":10,"deletions":5},
|
||||
{"filename":"README.md","status":"added","additions":20,"deletions":0}
|
||||
]`
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/api/v1/repos/o/r/pulls/42/files", r.URL.Path)
|
||||
assert.Equal(t, http.MethodGet, r.Method)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(filesJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := gitea.NewClient(srv.URL, "tok")
|
||||
files, err := c.GetPullRequestFiles(context.Background(), "o", "r", 42)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, files, 2)
|
||||
assert.Equal(t, "main.go", files[0].Filename)
|
||||
assert.Equal(t, "modified", files[0].Status)
|
||||
assert.Equal(t, 10, files[0].Additions)
|
||||
assert.Equal(t, 5, files[0].Deletions)
|
||||
assert.Equal(t, "README.md", files[1].Filename)
|
||||
assert.Equal(t, "added", files[1].Status)
|
||||
assert.Equal(t, 20, files[1].Additions)
|
||||
assert.Equal(t, 0, files[1].Deletions)
|
||||
}
|
||||
|
||||
func TestGetPullRequestDiff(t *testing.T) {
|
||||
rawDiff := "diff --git a/main.go b/main.go\n--- a/main.go\n+++ b/main.go\n@@ -1,2 +1,3 @@\n+package main\n"
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/api/v1/repos/o/r/pulls/42.diff", r.URL.Path)
|
||||
assert.Equal(t, http.MethodGet, r.Method)
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
_, _ = w.Write([]byte(rawDiff))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := gitea.NewClient(srv.URL, "tok")
|
||||
diff, err := c.GetPullRequestDiff(context.Background(), "o", "r", 42)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte(rawDiff), diff)
|
||||
}
|
||||
|
||||
171
internal/tools/pr_files_diff.go
Normal file
171
internal/tools/pr_files_diff.go
Normal file
@@ -0,0 +1,171 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/registry"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFileDiffBytes = 20 * 1024
|
||||
maxResponseBytes = 200 * 1024
|
||||
)
|
||||
|
||||
type PRFilesDiff struct {
|
||||
c *gitea.Client
|
||||
a *allowlist.Allowlist
|
||||
}
|
||||
|
||||
func NewPRFilesDiff(c *gitea.Client, a *allowlist.Allowlist) *PRFilesDiff {
|
||||
return &PRFilesDiff{c: c, a: a}
|
||||
}
|
||||
|
||||
func (t *PRFilesDiff) Descriptor() registry.ToolDescriptor {
|
||||
return registry.ToolDescriptor{
|
||||
Name: "pr_files_diff",
|
||||
Description: "Get a pull request's per-file diff with size caps (20KB/file, 200KB total).",
|
||||
InputSchema: json.RawMessage(`{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"owner":{"type":"string"},
|
||||
"name":{"type":"string"},
|
||||
"number":{"type":"integer","minimum":1}
|
||||
},
|
||||
"required":["owner","name","number"]
|
||||
}`),
|
||||
}
|
||||
}
|
||||
|
||||
type prFilesDiffArgs struct {
|
||||
Owner string `json:"owner"`
|
||||
Name string `json:"name"`
|
||||
Number int `json:"number"`
|
||||
}
|
||||
|
||||
type prFileDiffEntry struct {
|
||||
Path string `json:"path"`
|
||||
Diff string `json:"diff"`
|
||||
Truncated bool `json:"truncated"`
|
||||
OmittedLines int `json:"omitted_lines,omitempty"`
|
||||
Additions int `json:"additions"`
|
||||
Deletions int `json:"deletions"`
|
||||
}
|
||||
|
||||
func (t *PRFilesDiff) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
||||
var args prFilesDiffArgs
|
||||
if err := parseArgs(raw, &args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := t.a.Check(args.Owner); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if args.Number < 1 {
|
||||
return nil, fmt.Errorf("number must be >= 1: %w", gitea.ErrValidation)
|
||||
}
|
||||
|
||||
files, err := t.c.GetPullRequestFiles(ctx, args.Owner, args.Name, args.Number)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rawDiff, err := t.c.GetPullRequestDiff(ctx, args.Owner, args.Name, args.Number)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Split unified diff by per-file headers ("diff --git a/path b/path")
|
||||
perFile := splitUnifiedDiff(rawDiff)
|
||||
|
||||
out := struct {
|
||||
Files []prFileDiffEntry `json:"files"`
|
||||
OmittedFiles []string `json:"omitted_files,omitempty"`
|
||||
ResponseTruncated bool `json:"response_truncated"`
|
||||
}{
|
||||
Files: make([]prFileDiffEntry, 0, len(files)),
|
||||
}
|
||||
|
||||
totalBytes := 0
|
||||
for _, f := range files {
|
||||
// look up the diff for this file (best-effort by path match)
|
||||
diffBytes, ok := perFile[f.Filename]
|
||||
if !ok {
|
||||
diffBytes = []byte{}
|
||||
}
|
||||
|
||||
entry := prFileDiffEntry{
|
||||
Path: f.Filename,
|
||||
Additions: f.Additions,
|
||||
Deletions: f.Deletions,
|
||||
}
|
||||
|
||||
// Per-file cap
|
||||
if len(diffBytes) > maxFileDiffBytes {
|
||||
truncated := diffBytes[:maxFileDiffBytes]
|
||||
omittedLines := bytes.Count(diffBytes[maxFileDiffBytes:], []byte("\n"))
|
||||
entry.Diff = string(truncated)
|
||||
entry.Truncated = true
|
||||
entry.OmittedLines = omittedLines
|
||||
} else {
|
||||
entry.Diff = string(diffBytes)
|
||||
}
|
||||
|
||||
// Response cap — if adding this entry would exceed, push to omitted_files
|
||||
entryEstimate := len(entry.Diff) + 200 // small overhead for path + counts
|
||||
if totalBytes+entryEstimate > maxResponseBytes {
|
||||
out.OmittedFiles = append(out.OmittedFiles, f.Filename)
|
||||
out.ResponseTruncated = true
|
||||
continue
|
||||
}
|
||||
totalBytes += entryEstimate
|
||||
out.Files = append(out.Files, entry)
|
||||
}
|
||||
|
||||
return textOK(out)
|
||||
}
|
||||
|
||||
// splitUnifiedDiff parses a unified diff and returns a map from filename to that file's
|
||||
// portion of the diff. The unified diff format starts each file with a line like
|
||||
// "diff --git a/<path> b/<path>".
|
||||
func splitUnifiedDiff(d []byte) map[string][]byte {
|
||||
m := map[string][]byte{}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(d))
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) // allow long diffs
|
||||
|
||||
var currentFile string
|
||||
var current bytes.Buffer
|
||||
|
||||
flush := func() {
|
||||
if currentFile != "" {
|
||||
m[currentFile] = []byte(current.String())
|
||||
current.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "diff --git ") {
|
||||
flush()
|
||||
// Parse: "diff --git a/<path> b/<path>"
|
||||
rest := strings.TrimPrefix(line, "diff --git a/")
|
||||
parts := strings.SplitN(rest, " b/", 2)
|
||||
if len(parts) == 2 {
|
||||
currentFile = parts[0]
|
||||
} else {
|
||||
currentFile = ""
|
||||
}
|
||||
}
|
||||
if currentFile != "" {
|
||||
current.WriteString(line)
|
||||
current.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
flush()
|
||||
return m
|
||||
}
|
||||
183
internal/tools/pr_files_diff_test.go
Normal file
183
internal/tools/pr_files_diff_test.go
Normal file
@@ -0,0 +1,183 @@
|
||||
package tools_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
|
||||
"gitea.d-ma.be/mathias/gitea-mcp/internal/tools"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// buildDiff builds a synthetic unified diff for a set of files.
|
||||
// Each file gets `linesPerFile` added lines.
|
||||
func buildDiff(files []string, linesPerFile int) string {
|
||||
var sb strings.Builder
|
||||
for _, f := range files {
|
||||
fmt.Fprintf(&sb, "diff --git a/%s b/%s\n", f, f)
|
||||
fmt.Fprintf(&sb, "--- a/%s\n+++ b/%s\n", f, f)
|
||||
fmt.Fprintf(&sb, "@@ -0,0 +1,%d @@\n", linesPerFile)
|
||||
sb.WriteString(strings.Repeat("+abcdefghij\n", linesPerFile))
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// buildFilesJSON builds the JSON list of PullRequestFile objects.
|
||||
func buildFilesJSON(files []string, additions int) string {
|
||||
entries := make([]string, len(files))
|
||||
for i, f := range files {
|
||||
entries[i] = fmt.Sprintf(`{"filename":%q,"status":"modified","additions":%d,"deletions":0}`, f, additions)
|
||||
}
|
||||
return "[" + strings.Join(entries, ",") + "]"
|
||||
}
|
||||
|
||||
// newPRFilesDiffServer creates a test server that serves both the /files and .diff endpoints.
|
||||
func newPRFilesDiffServer(t *testing.T, filesJSON, rawDiff string) *httptest.Server {
|
||||
t.Helper()
|
||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/api/v1/repos/o/r/pulls/1/files":
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(filesJSON))
|
||||
case r.URL.Path == "/api/v1/repos/o/r/pulls/1.diff":
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
_, _ = w.Write([]byte(rawDiff))
|
||||
default:
|
||||
t.Errorf("unexpected request: %s", r.URL.Path)
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
func TestPRFilesDiffSmall(t *testing.T) {
|
||||
// Two files, each ~120 bytes of diff — well under per-file and total caps.
|
||||
fileNames := []string{"main.go", "util.go"}
|
||||
// ~10 lines each = ~120 bytes per file diff
|
||||
rawDiff := buildDiff(fileNames, 10)
|
||||
filesJSON := buildFilesJSON(fileNames, 10)
|
||||
|
||||
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
|
||||
defer srv.Close()
|
||||
|
||||
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
|
||||
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
|
||||
require.NoError(t, err)
|
||||
|
||||
var out struct {
|
||||
Files []struct {
|
||||
Path string `json:"path"`
|
||||
Diff string `json:"diff"`
|
||||
Truncated bool `json:"truncated"`
|
||||
Additions int `json:"additions"`
|
||||
Deletions int `json:"deletions"`
|
||||
} `json:"files"`
|
||||
OmittedFiles []string `json:"omitted_files"`
|
||||
ResponseTruncated bool `json:"response_truncated"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(result, &out))
|
||||
|
||||
assert.Len(t, out.Files, 2)
|
||||
assert.Empty(t, out.OmittedFiles)
|
||||
assert.False(t, out.ResponseTruncated)
|
||||
|
||||
for _, f := range out.Files {
|
||||
assert.False(t, f.Truncated, "file %s should not be truncated", f.Path)
|
||||
assert.NotEmpty(t, f.Diff)
|
||||
assert.Equal(t, 10, f.Additions)
|
||||
assert.Equal(t, 0, f.Deletions)
|
||||
}
|
||||
paths := []string{out.Files[0].Path, out.Files[1].Path}
|
||||
assert.ElementsMatch(t, fileNames, paths)
|
||||
}
|
||||
|
||||
func TestPRFilesDiffPerFileTruncated(t *testing.T) {
|
||||
// One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines).
|
||||
fileNames := []string{"bigfile.go"}
|
||||
linesPerFile := 2560 // ~30720 bytes > 20KB cap
|
||||
rawDiff := buildDiff(fileNames, linesPerFile)
|
||||
filesJSON := buildFilesJSON(fileNames, linesPerFile)
|
||||
|
||||
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
|
||||
defer srv.Close()
|
||||
|
||||
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
|
||||
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
|
||||
require.NoError(t, err)
|
||||
|
||||
var out struct {
|
||||
Files []struct {
|
||||
Path string `json:"path"`
|
||||
Diff string `json:"diff"`
|
||||
Truncated bool `json:"truncated"`
|
||||
OmittedLines int `json:"omitted_lines"`
|
||||
Additions int `json:"additions"`
|
||||
} `json:"files"`
|
||||
ResponseTruncated bool `json:"response_truncated"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(result, &out))
|
||||
|
||||
require.Len(t, out.Files, 1)
|
||||
f := out.Files[0]
|
||||
assert.Equal(t, "bigfile.go", f.Path)
|
||||
assert.True(t, f.Truncated, "file should be truncated")
|
||||
assert.Greater(t, f.OmittedLines, 0, "omitted_lines should be > 0")
|
||||
assert.LessOrEqual(t, len(f.Diff), 20*1024+200, "diff should be capped near 20KB")
|
||||
assert.False(t, out.ResponseTruncated)
|
||||
}
|
||||
|
||||
func TestPRFilesDiffResponseCapped(t *testing.T) {
|
||||
// 25 files × ~10KB diff each = ~250KB raw, well over the 200KB response cap.
|
||||
// Each file: 850 lines × 12 bytes = 10200 bytes per file.
|
||||
numFiles := 25
|
||||
linesPerFile := 850
|
||||
fileNames := make([]string, numFiles)
|
||||
for i := range fileNames {
|
||||
fileNames[i] = fmt.Sprintf("file%02d.go", i)
|
||||
}
|
||||
rawDiff := buildDiff(fileNames, linesPerFile)
|
||||
filesJSON := buildFilesJSON(fileNames, linesPerFile)
|
||||
|
||||
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
|
||||
defer srv.Close()
|
||||
|
||||
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
|
||||
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
|
||||
require.NoError(t, err)
|
||||
|
||||
var out struct {
|
||||
Files []struct {
|
||||
Path string `json:"path"`
|
||||
} `json:"files"`
|
||||
OmittedFiles []string `json:"omitted_files"`
|
||||
ResponseTruncated bool `json:"response_truncated"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(result, &out))
|
||||
|
||||
assert.True(t, out.ResponseTruncated, "response should be truncated")
|
||||
assert.NotEmpty(t, out.OmittedFiles, "some files should be omitted")
|
||||
assert.NotEmpty(t, out.Files, "some files should be included")
|
||||
|
||||
// Total files accounted for should equal numFiles.
|
||||
totalAccountedFor := len(out.Files) + len(out.OmittedFiles)
|
||||
assert.Equal(t, numFiles, totalAccountedFor)
|
||||
}
|
||||
|
||||
func TestPRFilesDiffAllowlistRejects(t *testing.T) {
|
||||
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"allowed"}))
|
||||
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"evil","name":"r","number":1}`))
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestPRFilesDiffRequiresValidNumber(t *testing.T) {
|
||||
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"o"}))
|
||||
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":0}`))
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, gitea.ErrValidation)
|
||||
}
|
||||
Reference in New Issue
Block a user