test(routing): de-flake TestRoutingPodEndToEnd
All checks were successful
CI / Lint / Test / Vet (push) Successful in 11s
CI / Mirror to GitHub (push) Successful in 4s

- Random port via net.Listen(":0") replaces hardcoded 33310 (was the
  primary failure mode under parallel test load).
- Bump waitForPort deadline 5s → 30s — `go build` under -race can exceed
  5s on a loaded machine.
- Replace osPath() (always returned empty PATH because exec.Command("env").Env
  is the *child's* env, not the parent's) with explicit PATH+HOME via
  os.Getenv. Don't inherit full env: would leak ROUTING_MCP_TOKEN from the
  parent shell and flip the routing pod into auth-required mode, breaking
  the test.

Closes #15. Verified: 10 cold-cache test runs pass, 3 consecutive task check
runs pass.
This commit is contained in:
Mathias
2026-05-18 20:00:18 +02:00
parent 937355cabe
commit fe18e4ee77

View File

@@ -4,9 +4,12 @@ import (
"context"
"encoding/json"
"io"
"net"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"strconv"
"strings"
"testing"
"time"
@@ -42,28 +45,33 @@ func TestRoutingPodEndToEnd(t *testing.T) {
}))
defer brain.Close()
port := freePort(t)
addr := "127.0.0.1:" + port
baseURL := "http://" + addr
bin := buildRouting(t)
cmd := exec.Command(bin)
cmd.Env = append(cmd.Env,
"ROUTING_PORT=33310",
"LITELLM_BASE_URL="+llm.URL,
cmd.Env = []string{
"ROUTING_PORT=" + port,
"LITELLM_BASE_URL=" + llm.URL,
"LITELLM_API_KEY=stub",
"BRAIN_URL="+brain.URL,
"BRAIN_URL=" + brain.URL,
"SUPERVISOR_CONFIG_DIR=../../config/supervisor",
"PATH="+osPath(),
)
"PATH=" + os.Getenv("PATH"),
"HOME=" + os.Getenv("HOME"),
}
require.NoError(t, cmd.Start())
t.Cleanup(func() { _ = cmd.Process.Kill() })
require.NoError(t, waitForPort(t, "127.0.0.1:33310", 5*time.Second))
require.NoError(t, waitForPort(t, addr, 30*time.Second))
resp := mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`)
resp := mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`)
assert.Contains(t, resp, `"review"`)
assert.Contains(t, resp, `"debug"`)
assert.Contains(t, resp, `"retrospective"`)
assert.Contains(t, resp, `"trainer"`)
resp = mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`)
resp = mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`)
_ = resp // shape varies by skill; we only need a 200
// Wait briefly for the async session_log to land.
@@ -113,11 +121,15 @@ func mcpCall(t *testing.T, url, body string) string {
return string(raw)
}
func osPath() string {
for _, e := range append([]string{}, exec.Command("env").Env...) {
if strings.HasPrefix(e, "PATH=") {
return strings.TrimPrefix(e, "PATH=")
}
}
return "/usr/bin:/bin"
// freePort grabs an OS-assigned TCP port and releases it. There is a small
// race window before the subprocess re-binds it, but it is acceptable for
// test isolation against a hardcoded port colliding with another test or
// stray process.
func freePort(t *testing.T) string {
t.Helper()
l, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
port := l.Addr().(*net.TCPAddr).Port
require.NoError(t, l.Close())
return strconv.Itoa(port)
}