From fe18e4ee77190126d1fdf171956d500cf0585926 Mon Sep 17 00:00:00 2001 From: Mathias Date: Mon, 18 May 2026 20:00:18 +0200 Subject: [PATCH] test(routing): de-flake TestRoutingPodEndToEnd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Random port via net.Listen(":0") replaces hardcoded 33310 (was the primary failure mode under parallel test load). - Bump waitForPort deadline 5s → 30s — `go build` under -race can exceed 5s on a loaded machine. - Replace osPath() (always returned empty PATH because exec.Command("env").Env is the *child's* env, not the parent's) with explicit PATH+HOME via os.Getenv. Don't inherit full env: would leak ROUTING_MCP_TOKEN from the parent shell and flip the routing pod into auth-required mode, breaking the test. Closes #15. Verified: 10 cold-cache test runs pass, 3 consecutive task check runs pass. --- cmd/routing/main_test.go | 44 +++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/cmd/routing/main_test.go b/cmd/routing/main_test.go index 407c4cd..20c3fc4 100644 --- a/cmd/routing/main_test.go +++ b/cmd/routing/main_test.go @@ -4,9 +4,12 @@ import ( "context" "encoding/json" "io" + "net" "net/http" "net/http/httptest" + "os" "os/exec" + "strconv" "strings" "testing" "time" @@ -42,28 +45,33 @@ func TestRoutingPodEndToEnd(t *testing.T) { })) defer brain.Close() + port := freePort(t) + addr := "127.0.0.1:" + port + baseURL := "http://" + addr + bin := buildRouting(t) cmd := exec.Command(bin) - cmd.Env = append(cmd.Env, - "ROUTING_PORT=33310", - "LITELLM_BASE_URL="+llm.URL, + cmd.Env = []string{ + "ROUTING_PORT=" + port, + "LITELLM_BASE_URL=" + llm.URL, "LITELLM_API_KEY=stub", - "BRAIN_URL="+brain.URL, + "BRAIN_URL=" + brain.URL, "SUPERVISOR_CONFIG_DIR=../../config/supervisor", - "PATH="+osPath(), - ) + "PATH=" + os.Getenv("PATH"), + "HOME=" + os.Getenv("HOME"), + } require.NoError(t, cmd.Start()) t.Cleanup(func() { _ = cmd.Process.Kill() }) - require.NoError(t, waitForPort(t, "127.0.0.1:33310", 5*time.Second)) + require.NoError(t, waitForPort(t, addr, 30*time.Second)) - resp := mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`) + resp := mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`) assert.Contains(t, resp, `"review"`) assert.Contains(t, resp, `"debug"`) assert.Contains(t, resp, `"retrospective"`) assert.Contains(t, resp, `"trainer"`) - resp = mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`) + resp = mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`) _ = resp // shape varies by skill; we only need a 200 // Wait briefly for the async session_log to land. @@ -113,11 +121,15 @@ func mcpCall(t *testing.T, url, body string) string { return string(raw) } -func osPath() string { - for _, e := range append([]string{}, exec.Command("env").Env...) { - if strings.HasPrefix(e, "PATH=") { - return strings.TrimPrefix(e, "PATH=") - } - } - return "/usr/bin:/bin" +// freePort grabs an OS-assigned TCP port and releases it. There is a small +// race window before the subprocess re-binds it, but it is acceptable for +// test isolation against a hardcoded port colliding with another test or +// stray process. +func freePort(t *testing.T) string { + t.Helper() + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + port := l.Addr().(*net.TCPAddr).Port + require.NoError(t, l.Close()) + return strconv.Itoa(port) }