Compare commits
22 Commits
23dd355b8a
...
v0.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
509c04b6e4 | ||
|
|
738275252c | ||
|
|
38fcac4cba | ||
|
|
7697e901d2 | ||
|
|
8cff57009a | ||
|
|
8fb44affef | ||
|
|
582ca5019b | ||
|
|
858a9ba1a1 | ||
|
|
cbef2da8de | ||
|
|
b493651c26 | ||
|
|
6169404f34 | ||
|
|
a67106026f | ||
|
|
99d523189f | ||
|
|
2d219760e5 | ||
|
|
4bf5edb78e | ||
|
|
98acf1c14e | ||
|
|
9741d8ba28 | ||
|
|
bf67299a48 | ||
|
|
24d9216474 | ||
|
|
344def20bb | ||
|
|
d084af1af0 | ||
|
|
e98bb2ba65 |
@@ -6,7 +6,7 @@
|
|||||||
},
|
},
|
||||||
"supervisor": {
|
"supervisor": {
|
||||||
"url": "http://localhost:3200/mcp",
|
"url": "http://localhost:3200/mcp",
|
||||||
"description": "Skill workers — TDD (red/green/refactor), more coming"
|
"description": "Hyperguild SDO — skill workers (tdd, retrospective), brain tools (brain_query, brain_write), session logging, tier detection"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,3 +6,12 @@ SUPERVISOR_MODELS_FILE=./config/models.yaml
|
|||||||
# LiteLLM gateway (iguana)
|
# LiteLLM gateway (iguana)
|
||||||
LITELLM_BASE_URL=http://iguana:4000
|
LITELLM_BASE_URL=http://iguana:4000
|
||||||
LITELLM_API_KEY=your-litellm-master-key
|
LITELLM_API_KEY=your-litellm-master-key
|
||||||
|
|
||||||
|
# Ingestion server
|
||||||
|
INGEST_BASE_URL=http://localhost:3300
|
||||||
|
INGEST_PORT=3300
|
||||||
|
INGEST_BRAIN_DIR=./brain
|
||||||
|
|
||||||
|
# Brain directories
|
||||||
|
SUPERVISOR_SESSIONS_DIR=./brain/sessions
|
||||||
|
SUPERVISOR_BRAIN_DIR=./brain
|
||||||
|
|||||||
58
.gitea/workflows/ci.yml
Normal file
58
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ["v*"]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# ── 1. Quality gate ─────────────────────────────────────────────────────────
|
||||||
|
check:
|
||||||
|
name: Lint / Test / Vet
|
||||||
|
runs-on: self-hosted
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: false # self-hosted: Go cache persists on disk between runs
|
||||||
|
|
||||||
|
- name: Verify toolchain
|
||||||
|
run: |
|
||||||
|
go version
|
||||||
|
task --version
|
||||||
|
govulncheck -version 2>&1 || true
|
||||||
|
|
||||||
|
- name: Install golangci-lint
|
||||||
|
run: |
|
||||||
|
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh \
|
||||||
|
| sh -s -- -b "$(go env GOPATH)/bin" v2.11.4
|
||||||
|
golangci-lint --version
|
||||||
|
|
||||||
|
- name: Run checks
|
||||||
|
run: task check
|
||||||
|
|
||||||
|
# ── 2. Mirror to GitHub ─────────────────────────────────────────────────────
|
||||||
|
mirror:
|
||||||
|
name: Mirror to GitHub
|
||||||
|
needs: check
|
||||||
|
runs-on: self-hosted
|
||||||
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Push to GitHub
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo '${{ secrets.GH_DEPLOY_KEY }}' > ~/.ssh/id_rsa_gh_mirror
|
||||||
|
chmod 600 ~/.ssh/id_rsa_gh_mirror
|
||||||
|
ssh-keyscan github.com >> ~/.ssh/known_hosts 2>/dev/null
|
||||||
|
GIT_SSH_COMMAND="ssh -i ~/.ssh/id_rsa_gh_mirror -o IdentitiesOnly=yes" \
|
||||||
|
git push git@github.com:mathiasb/hyperguild.git HEAD:main --tags
|
||||||
|
rm ~/.ssh/id_rsa_gh_mirror
|
||||||
|
echo "✓ Mirrored to GitHub"
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -44,3 +44,6 @@ secrets/
|
|||||||
# OS
|
# OS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
|
# Brainstorm sessions
|
||||||
|
.superpowers/
|
||||||
|
|||||||
2
Procfile
Normal file
2
Procfile
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
ingestion: cd ingestion && INGEST_BRAIN_DIR=../brain INGEST_PORT=3300 go run ./cmd/server/
|
||||||
|
supervisor: SUPERVISOR_CONFIG_DIR=./config/supervisor SUPERVISOR_MODELS_FILE=./config/models.yaml SUPERVISOR_SESSIONS_DIR=./brain/sessions INGEST_BASE_URL=http://localhost:3300 go run ./cmd/supervisor/
|
||||||
165
README.md
165
README.md
@@ -1,98 +1,109 @@
|
|||||||
# Project template
|
# hyperguild
|
||||||
|
|
||||||
Harness-agnostic project scaffold using the Agent Skills open standard.
|
An MCP server that acts as a disciplined AI supervisor for Claude Code sessions.
|
||||||
|
Instead of letting Claude Code do whatever it wants, hyperguild enforces structured
|
||||||
|
workflows (TDD red/green/refactor), logs every session, and accumulates learnings
|
||||||
|
into a searchable brain.
|
||||||
|
|
||||||
## Quick start
|
## How it works
|
||||||
|
|
||||||
|
```
|
||||||
|
Your Claude Code session (in any project)
|
||||||
|
│
|
||||||
|
│ MCP tools (over stdio bridge → HTTP)
|
||||||
|
▼
|
||||||
|
supervisor :3200 — skill workers: tdd, retrospective
|
||||||
|
ingestion :3300 — brain HTTP API: query wiki, write notes
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
brain/
|
||||||
|
├── sessions/ — JSONL log, one file per session_id
|
||||||
|
├── wiki/ — searchable knowledge (full-text)
|
||||||
|
│ ├── concepts/
|
||||||
|
│ ├── entities/
|
||||||
|
│ └── sources/
|
||||||
|
├── raw/ — retrospective output, staged for review
|
||||||
|
└── training-data/ — SFT/DPO/RL data (Phase 2)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Phase 1 tools (available now)
|
||||||
|
|
||||||
|
| Tool | What it does |
|
||||||
|
|------|-------------|
|
||||||
|
| `tdd_red` | Writes a failing test for a spec, verifies it fails |
|
||||||
|
| `tdd_green` | Writes the minimal implementation to make tests pass |
|
||||||
|
| `tdd_refactor` | Cleans up implementation while keeping tests green |
|
||||||
|
| `session_log` | Appends a structured entry to the session JSONL log |
|
||||||
|
| `retrospective` | Reads the session log, identifies novel learnings, writes to brain/raw/ |
|
||||||
|
| `brain_query` | Full-text search over brain/wiki/ |
|
||||||
|
| `brain_write` | Writes a note to brain/raw/ (with optional YAML frontmatter) |
|
||||||
|
| `tier` | Returns the current connectivity tier (1=cloud, 2=LAN, 3=offline) |
|
||||||
|
|
||||||
|
## Start the servers
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
degit mathias/project-template my-new-project
|
# Requires goreman: go install github.com/mattn/goreman@latest
|
||||||
cd my-new-project
|
task start # starts ingestion (:3300) + supervisor (:3200) via goreman
|
||||||
task init
|
task stop # kills both by port
|
||||||
```
|
```
|
||||||
|
|
||||||
## Structure
|
## Connect a project
|
||||||
|
|
||||||
```
|
Create `.mcp.json` in your project root:
|
||||||
.context/
|
|
||||||
├── PROJECT.md ← Canonical project context (edit this)
|
|
||||||
├── mcp.json ← MCP server config (generated on first sync)
|
|
||||||
└── system-prompt.txt ← Generated: generic system prompt
|
|
||||||
|
|
||||||
.skills/
|
```json
|
||||||
├── go-patterns/
|
{
|
||||||
│ └── SKILL.md ← Agent Skills standard format
|
"mcpServers": {
|
||||||
└── htmx-patterns/
|
"supervisor": {
|
||||||
└── SKILL.md
|
"command": "/Users/mathias/dev/AI/supervisor/bin/supervisor-bridge",
|
||||||
|
"env": {
|
||||||
scripts/
|
"SUPERVISOR_URL": "http://localhost:3200/mcp"
|
||||||
└── context-sync.sh ← Adapter generator (finds root AGENT.md automatically)
|
}
|
||||||
|
}
|
||||||
Taskfile.yml ← Task runner config
|
}
|
||||||
DECISIONS.md ← Why things are the way they are
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Generated files (gitignored)
|
Build the bridge binary once: `task bridge:build`
|
||||||
|
|
||||||
| File | Consumer | Notes |
|
Then open Claude Code in your project — run `/mcp` to confirm `supervisor` is listed.
|
||||||
|------|----------|-------|
|
|
||||||
| `AGENTS.md` | Crush, Pi, Antigravity | Root + project concatenated |
|
|
||||||
| `CLAUDE.md` | Claude Code | Project-only (inherits root via tree walk) |
|
|
||||||
| `.cursorrules` | Cursor | Root + project concatenated |
|
|
||||||
| `.aider.conventions.md` | Aider | Root + project concatenated |
|
|
||||||
| `.context/system-prompt.txt` | Open WebUI, Mods, generic | Root + project concatenated |
|
|
||||||
|
|
||||||
## How root context works
|
## A typical TDD session
|
||||||
|
|
||||||
The script walks up from the project directory looking for `~/dev/.context/AGENT.md`.
|
```
|
||||||
|
1. Call tdd_red → spec in, failing test file out
|
||||||
- **Claude Code**: inherits natively (reads every `CLAUDE.md` up the tree) → project CLAUDE.md is project-only
|
2. Call tdd_green → test path in, implementation out
|
||||||
- **Everything else**: can't walk the tree → script concatenates root + project into each generated file
|
3. Call tdd_refactor → impl + test in, cleaned code out
|
||||||
|
4. Call session_log → log each phase result
|
||||||
## Skills
|
5. Call retrospective → extracts learnings → brain/raw/
|
||||||
|
6. Review brain/raw/, move worthy notes to brain/wiki/concepts/
|
||||||
Skills use the [Agent Skills open standard](https://github.com/badlogic/pi-skills). Each skill is a folder with a `SKILL.md` containing frontmatter:
|
7. Future sessions: call brain_query to retrieve relevant context
|
||||||
|
|
||||||
```yaml
|
|
||||||
---
|
|
||||||
name: my-skill
|
|
||||||
description: What this skill does. When to use it.
|
|
||||||
---
|
|
||||||
# Instructions here
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Supported natively by Claude Code, Pi, Crush, and Antigravity. No adapter needed for skills.
|
## Tier detection
|
||||||
|
|
||||||
### Adding a skill
|
The supervisor probes connectivity at call time:
|
||||||
|
|
||||||
```bash
|
| Tier | Label | Condition |
|
||||||
mkdir .skills/my-new-skill
|
|------|-------|-----------|
|
||||||
# Create .skills/my-new-skill/SKILL.md with frontmatter + instructions
|
| 1 | full-online | Can reach api.anthropic.com |
|
||||||
```
|
| 2 | lan-only | Can reach LiteLLM but not Anthropic |
|
||||||
|
| 3 | airplane | No external connectivity |
|
||||||
|
|
||||||
### Using pi-skills (cross-compatible)
|
## Key env vars
|
||||||
|
|
||||||
```bash
|
| Variable | Default | Purpose |
|
||||||
# User-level (all projects)
|
|----------|---------|---------|
|
||||||
git clone https://github.com/badlogic/pi-skills ~/.pi/agent/skills/pi-skills
|
| `INGEST_BRAIN_DIR` | `../brain` | Brain directory for ingestion server |
|
||||||
|
| `INGEST_PORT` | `3300` | Ingestion server port |
|
||||||
|
| `SUPERVISOR_CONFIG_DIR` | `./config/supervisor` | Skill discipline files |
|
||||||
|
| `SUPERVISOR_SESSIONS_DIR` | `./brain/sessions` | JSONL session logs |
|
||||||
|
| `INGEST_BASE_URL` | `http://localhost:3300` | Supervisor → ingestion |
|
||||||
|
| `LITELLM_BASE_URL` | — | LiteLLM proxy for Tier 2 model routing |
|
||||||
|
|
||||||
# Symlink for Claude Code
|
## Phase 2 (planned)
|
||||||
ln -s ~/.pi/agent/skills/pi-skills/brave-search ~/.claude/skills/brave-search
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage with specific tools
|
- `review` skill — structured code review with iron law enforcement
|
||||||
|
- `debug` skill — hypothesis-driven debugging sessions
|
||||||
**Claude Code**: `task context:sync:claude` → reads `CLAUDE.md` + discovers `.skills/*/SKILL.md`
|
- `spec` skill — generates specs from conversations
|
||||||
|
- `trainer` — extracts SFT/DPO pairs from session logs for fine-tuning
|
||||||
**Crush**: `task context:sync:agents` → reads `AGENTS.md` + discovers `.skills/*/SKILL.md`
|
|
||||||
|
|
||||||
**Pi**: `task context:sync:agents` → reads `AGENTS.md` + discovers `.skills/*/SKILL.md` (or symlink `.skills/` to `.pi/skills/`)
|
|
||||||
|
|
||||||
**Antigravity**: `task context:sync:agents` → reads `AGENTS.md` + discovers `.skills/*/SKILL.md`
|
|
||||||
|
|
||||||
**Cursor**: `task context:sync:cursor` → reads `.cursorrules`
|
|
||||||
|
|
||||||
**Mistral Vibe**: Run root-level `task context:sync:vibe` once → `vibe --agent mathias`
|
|
||||||
|
|
||||||
**Open WebUI / Mods**: Copy `.context/system-prompt.txt` into a preset or pipe it
|
|
||||||
|
|
||||||
**Any other tool**: Point at `.context/PROJECT.md` directly — it's human-readable markdown
|
|
||||||
|
|||||||
126
Taskfile.yml
126
Taskfile.yml
@@ -1,7 +1,11 @@
|
|||||||
version: '3'
|
version: '3'
|
||||||
|
|
||||||
vars:
|
vars:
|
||||||
PROJECT_NAME: '{{.PROJECT_NAME | default "myproject"}}'
|
PROJECT_NAME: hyperguild
|
||||||
|
VERSION:
|
||||||
|
sh: git describe --tags --always --dirty 2>/dev/null || echo "dev"
|
||||||
|
SHORT_SHA:
|
||||||
|
sh: git rev-parse --short HEAD
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
context:sync:
|
context:sync:
|
||||||
@@ -19,57 +23,109 @@ tasks:
|
|||||||
context:sync:cursor:
|
context:sync:cursor:
|
||||||
cmds: [bash scripts/context-sync.sh cursor]
|
cmds: [bash scripts/context-sync.sh cursor]
|
||||||
|
|
||||||
dev:
|
# ── Development ────────────────────────────────────────────────────────────
|
||||||
desc: Start development server
|
|
||||||
cmds:
|
|
||||||
- go run ./cmd/server
|
|
||||||
|
|
||||||
build:
|
start:
|
||||||
desc: Build the binary
|
desc: Start ingestion + supervisor (requires goreman — go install github.com/mattn/goreman@latest)
|
||||||
cmds:
|
cmds:
|
||||||
- go build -o bin/{{.PROJECT_NAME}} ./cmd/server
|
- goreman start
|
||||||
|
|
||||||
check:
|
stop:
|
||||||
desc: Run all checks (lint + test + vet)
|
desc: Stop all hyperguild processes (Ctrl-C in the goreman terminal, or kill by port)
|
||||||
cmds:
|
cmds:
|
||||||
- task: lint
|
- lsof -ti:3300 | xargs kill -9 2>/dev/null || true
|
||||||
- task: test
|
- lsof -ti:3200 | xargs kill -9 2>/dev/null || true
|
||||||
- task: vet
|
- echo "hyperguild stopped"
|
||||||
|
|
||||||
lint:
|
|
||||||
cmds: [golangci-lint run ./...]
|
|
||||||
test:
|
|
||||||
cmds: [go test -race -count=1 ./...]
|
|
||||||
vet:
|
|
||||||
cmds:
|
|
||||||
- go vet ./...
|
|
||||||
- govulncheck ./... || true
|
|
||||||
|
|
||||||
up:
|
|
||||||
desc: Start containers
|
|
||||||
cmds: [docker compose up -d]
|
|
||||||
down:
|
|
||||||
cmds: [docker compose down]
|
|
||||||
|
|
||||||
init:
|
|
||||||
desc: Initialize a new project from this template
|
|
||||||
cmds:
|
|
||||||
- bash scripts/init.sh
|
|
||||||
|
|
||||||
supervisor:dev:
|
supervisor:dev:
|
||||||
desc: Run supervisor MCP server (development)
|
desc: Run supervisor MCP server (development)
|
||||||
cmds:
|
cmds:
|
||||||
- go run ./cmd/supervisor
|
- go run ./cmd/supervisor
|
||||||
|
|
||||||
|
ingestion:dev:
|
||||||
|
desc: Run ingestion server in development mode
|
||||||
|
dir: ingestion
|
||||||
|
env:
|
||||||
|
INGEST_BRAIN_DIR: "{{.ROOT_DIR}}/brain"
|
||||||
|
INGEST_PORT: "3300"
|
||||||
|
cmds:
|
||||||
|
- go run ./cmd/server
|
||||||
|
|
||||||
|
# ── Build ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
build:
|
||||||
|
desc: Build all binaries
|
||||||
|
cmds:
|
||||||
|
- task: supervisor:build
|
||||||
|
- task: bridge:build
|
||||||
|
- task: ingestion:build
|
||||||
|
|
||||||
supervisor:build:
|
supervisor:build:
|
||||||
desc: Build supervisor binary
|
desc: Build supervisor binary
|
||||||
cmds:
|
cmds:
|
||||||
- go build -o bin/supervisor ./cmd/supervisor
|
- go build -trimpath -ldflags="-s -w -X main.version={{.VERSION}}" -o bin/supervisor ./cmd/supervisor
|
||||||
|
|
||||||
|
bridge:build:
|
||||||
|
desc: Build stdio↔HTTP bridge for Claude Code MCP integration
|
||||||
|
cmds:
|
||||||
|
- go build -trimpath -ldflags="-s -w" -o bin/supervisor-bridge ./cmd/bridge
|
||||||
|
|
||||||
|
ingestion:build:
|
||||||
|
desc: Build ingestion server binary
|
||||||
|
dir: ingestion
|
||||||
|
cmds:
|
||||||
|
- go build -trimpath -ldflags="-s -w" -o ../bin/ingestion-server ./cmd/server
|
||||||
|
|
||||||
|
# ── Quality ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
check:
|
||||||
|
desc: Run all checks (lint + test + vet) across all modules
|
||||||
|
cmds:
|
||||||
|
- task: lint
|
||||||
|
- task: test
|
||||||
|
- task: vet
|
||||||
|
|
||||||
|
lint:
|
||||||
|
cmds:
|
||||||
|
- golangci-lint run ./...
|
||||||
|
- cd ingestion && golangci-lint run ./...
|
||||||
|
|
||||||
|
test:
|
||||||
|
cmds:
|
||||||
|
- go test -race -count=1 ./...
|
||||||
|
- cd ingestion && go test -race -count=1 ./...
|
||||||
|
|
||||||
|
vet:
|
||||||
|
cmds:
|
||||||
|
- go vet ./...
|
||||||
|
- govulncheck ./... || true
|
||||||
|
- cd ingestion && go vet ./...
|
||||||
|
|
||||||
supervisor:test:smoke:
|
supervisor:test:smoke:
|
||||||
desc: Smoke test supervisor via MCP (requires supervisor:dev running)
|
desc: Smoke test supervisor via MCP (requires start running)
|
||||||
cmds:
|
cmds:
|
||||||
- |
|
- |
|
||||||
curl -s -X POST http://localhost:${SUPERVISOR_PORT:-3200}/mcp \
|
curl -s -X POST http://localhost:${SUPERVISOR_PORT:-3200}/mcp \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' | jq .
|
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' | jq .
|
||||||
|
|
||||||
|
# ── Git / Release ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
tag:
|
||||||
|
desc: Create and push a semver tag (usage — task tag version=v1.2.3)
|
||||||
|
preconditions:
|
||||||
|
- sh: '[[ "{{.version}}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$ ]]'
|
||||||
|
msg: "version must be semver, e.g. v1.2.3 or v1.2.3-rc.1"
|
||||||
|
- sh: "git diff --quiet && git diff --cached --quiet"
|
||||||
|
msg: "working tree must be clean before tagging"
|
||||||
|
cmds:
|
||||||
|
- git tag -a {{.version}} -m "Release {{.version}}"
|
||||||
|
- git push origin {{.version}}
|
||||||
|
|
||||||
|
push:
|
||||||
|
desc: Push current branch and tags to origin
|
||||||
|
vars:
|
||||||
|
BRANCH:
|
||||||
|
sh: git rev-parse --abbrev-ref HEAD
|
||||||
|
cmds:
|
||||||
|
- git push origin {{.BRANCH}} --tags
|
||||||
|
|||||||
3
brain/raw/tdd-pattern-test.md
Normal file
3
brain/raw/tdd-pattern-test.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# TDD Pattern
|
||||||
|
|
||||||
|
Always write the failing test first.
|
||||||
3
brain/wiki/concepts/tdd-pattern-test.md
Normal file
3
brain/wiki/concepts/tdd-pattern-test.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# TDD Pattern
|
||||||
|
|
||||||
|
Always write the failing test first.
|
||||||
59
cmd/bridge/main.go
Normal file
59
cmd/bridge/main.go
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
// bridge is a stdio↔HTTP adapter that lets Claude Code connect to the
|
||||||
|
// supervisor MCP server via the stdio transport.
|
||||||
|
//
|
||||||
|
// Claude Code spawns this binary as a subprocess and communicates over
|
||||||
|
// stdin/stdout. Each newline-delimited JSON-RPC message from stdin is
|
||||||
|
// forwarded to the supervisor HTTP server and the response is written back.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// SUPERVISOR_URL=http://localhost:3200/mcp bridge
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
url := os.Getenv("SUPERVISOR_URL")
|
||||||
|
if url == "" {
|
||||||
|
url = "http://localhost:3200/mcp"
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
scanner := bufio.NewScanner(os.Stdin)
|
||||||
|
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Bytes()
|
||||||
|
if len(bytes.TrimSpace(line)) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(line))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "bridge: build request: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "bridge: request failed: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_, _ = io.Copy(os.Stdout, resp.Body)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
_, _ = os.Stdout.Write([]byte("\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "bridge: scanner: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@@ -9,7 +10,16 @@ import (
|
|||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
"github.com/mathiasbq/supervisor/internal/mcp"
|
"github.com/mathiasbq/supervisor/internal/mcp"
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/brain"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/org"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
||||||
|
skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/review"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/sessionlog"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/tier"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -39,18 +49,99 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
retroPrompt, err := os.ReadFile(cfg.ConfigDir + "/retrospective.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read retrospective.md", "path", cfg.ConfigDir+"/retrospective.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
reviewPrompt, err := os.ReadFile(cfg.ConfigDir + "/review.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read review.md", "path", cfg.ConfigDir+"/review.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
specPrompt, err := os.ReadFile(cfg.ConfigDir + "/spec.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read spec.md", "path", cfg.ConfigDir+"/spec.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
trainerReaderPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-reader.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read trainer-reader.md", "path", cfg.ConfigDir+"/trainer-reader.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
trainerWriterPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-writer.md")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("read trainer-writer.md", "path", cfg.ConfigDir+"/trainer-writer.md", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
executor := iexec.New(iexec.Config{
|
executor := iexec.New(iexec.Config{
|
||||||
SystemPrompt: string(systemPrompt),
|
SystemPrompt: string(systemPrompt),
|
||||||
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
||||||
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
tierFn := func(ctx context.Context) tier.Info {
|
||||||
|
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
||||||
|
}
|
||||||
|
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
reg.Register(tdd.New(tdd.Config{
|
reg.Register(tdd.New(tdd.Config{
|
||||||
SystemPrompt: string(systemPrompt),
|
SystemPrompt: string(systemPrompt),
|
||||||
SkillPrompt: string(tddPrompt),
|
SkillPrompt: string(tddPrompt),
|
||||||
DefaultModel: models.Resolve("tdd", ""),
|
DefaultModel: models.Resolve("tdd", ""),
|
||||||
ExecutorFn: executor.Run,
|
ExecutorFn: executor.Run,
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
}))
|
||||||
|
reg.Register(brain.New(brain.Config{
|
||||||
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
|
}))
|
||||||
|
reg.Register(org.New(org.Config{
|
||||||
|
TierFn: tierFn,
|
||||||
|
}))
|
||||||
|
reg.Register(sessionlog.New(sessionlog.Config{
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
}))
|
||||||
|
reg.Register(retrospective.New(retrospective.Config{
|
||||||
|
SkillPrompt: string(retroPrompt),
|
||||||
|
DefaultModel: models.Resolve("retrospective", ""),
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
ExecutorFn: executor.Run,
|
||||||
|
}))
|
||||||
|
reg.Register(review.New(review.Config{
|
||||||
|
SkillPrompt: string(reviewPrompt),
|
||||||
|
DefaultModel: models.Resolve("review", ""),
|
||||||
|
ExecutorFn: executor.Run,
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
}))
|
||||||
|
reg.Register(skilldebug.New(skilldebug.Config{
|
||||||
|
SkillPrompt: string(debugPrompt),
|
||||||
|
DefaultModel: models.Resolve("debug", ""),
|
||||||
|
ExecutorFn: executor.Run,
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
}))
|
||||||
|
reg.Register(spec.New(spec.Config{
|
||||||
|
SkillPrompt: string(specPrompt),
|
||||||
|
DefaultModel: models.Resolve("spec", ""),
|
||||||
|
ExecutorFn: executor.Run,
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
}))
|
||||||
|
reg.Register(trainer.New(trainer.Config{
|
||||||
|
ReaderPrompt: string(trainerReaderPrompt),
|
||||||
|
WriterPrompt: string(trainerWriterPrompt),
|
||||||
|
DefaultModel: models.Resolve("trainer", ""),
|
||||||
|
ExecutorFn: executor.Run,
|
||||||
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
BrainDir: cfg.BrainDir,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
srv := mcp.NewServer(reg)
|
srv := mcp.NewServer(reg)
|
||||||
|
|||||||
@@ -5,6 +5,9 @@
|
|||||||
default: ollama/qwen3-coder-30b-tuned
|
default: ollama/qwen3-coder-30b-tuned
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd: ollama/qwen3-coder-30b-tuned
|
tdd: ollama/qwen3-coder-30b-tuned
|
||||||
review: ollama/devstral-tuned
|
review: ollama/devstral-tuned
|
||||||
debug: ollama/deepseek-r1-tuned
|
debug: ollama/deepseek-r1-tuned
|
||||||
|
retrospective: ollama/qwen3-coder-30b-tuned
|
||||||
|
spec: ollama/qwen3-coder-30b-tuned
|
||||||
|
trainer: ollama/qwen3-coder-30b-tuned
|
||||||
|
|||||||
31
config/supervisor/debug.md
Normal file
31
config/supervisor/debug.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Debug Discipline
|
||||||
|
|
||||||
|
You are a systematic debugger. Form hypotheses before suggesting fixes.
|
||||||
|
|
||||||
|
## Iron laws
|
||||||
|
1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct
|
||||||
|
2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first)
|
||||||
|
3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses
|
||||||
|
|
||||||
|
## Output contract
|
||||||
|
Return JSON result with:
|
||||||
|
- `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse)
|
||||||
|
- `phase`: "debug"
|
||||||
|
- `skill`: "debug"
|
||||||
|
- `file_path`: the most relevant file to the error (read it)
|
||||||
|
- `runner_output`: your hypotheses, formatted as:
|
||||||
|
```
|
||||||
|
HYPOTHESIS 1 (likelihood: high): <mechanism>
|
||||||
|
VERIFY: <exact command or file to check> → expected if correct: <specific output>
|
||||||
|
|
||||||
|
HYPOTHESIS 2 (likelihood: medium): <mechanism>
|
||||||
|
VERIFY: <exact command or file to check> → expected if correct: <specific output>
|
||||||
|
```
|
||||||
|
- `verified`: false — verification is the caller's job
|
||||||
|
- `message`: "N hypotheses for: <one-line error summary>"
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
1. Read the error and any context files provided before forming hypotheses
|
||||||
|
2. Identify the failure mode first — what actually went wrong, not just what the error says
|
||||||
|
3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output
|
||||||
|
4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1
|
||||||
30
config/supervisor/review.md
Normal file
30
config/supervisor/review.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Code Review Discipline
|
||||||
|
|
||||||
|
You are a disciplined code reviewer. Read files carefully before commenting.
|
||||||
|
|
||||||
|
## Iron laws
|
||||||
|
1. Never approve security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries
|
||||||
|
2. Never approve silently swallowed errors — `err != nil` without wrapping or handling is always wrong
|
||||||
|
3. Never approve missing validation at system boundaries (user input, external APIs, file reads)
|
||||||
|
|
||||||
|
## Output contract
|
||||||
|
Return JSON result with:
|
||||||
|
- `status`: "pass" if no blocking issues; "fail" if any iron law is violated
|
||||||
|
- `phase`: "review"
|
||||||
|
- `skill`: "review"
|
||||||
|
- `file_path`: first file reviewed
|
||||||
|
- `runner_output`: full review formatted as:
|
||||||
|
```
|
||||||
|
CRITICAL: <issue> at <file>:<line>
|
||||||
|
WARNING: <issue> at <file>:<line>
|
||||||
|
SUGGESTION: <issue> at <file>:<line>
|
||||||
|
```
|
||||||
|
- `verified`: true if you read all specified files; false if any were missing or unreadable
|
||||||
|
- `message`: "N critical, M warnings, K suggestions" or "clean: <which iron law checks passed and why>"
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
1. Read every file listed before writing feedback
|
||||||
|
2. Check iron laws first — any violation is CRITICAL and sets status to "fail"
|
||||||
|
3. Then check: correctness, test coverage for new code, Go style conventions
|
||||||
|
4. Never rubber-stamp — if nothing is wrong, explain specifically which iron law checks you ran and why they passed
|
||||||
|
5. Line references are required for every finding — "roughly around the middle" is not acceptable
|
||||||
46
config/supervisor/spec.md
Normal file
46
config/supervisor/spec.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Spec Writing Discipline
|
||||||
|
|
||||||
|
You write structured implementation specs. Nothing is left ambiguous.
|
||||||
|
|
||||||
|
## Iron laws
|
||||||
|
1. Success criteria must be measurable — "the system is fast" is banned; "p99 < 200ms under 100 RPS" is valid
|
||||||
|
2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong
|
||||||
|
3. Every technical decision in the approach must have a rationale
|
||||||
|
|
||||||
|
## Output contract
|
||||||
|
Return JSON result with:
|
||||||
|
- `status`: "pass" (spec written) or "error" (requirements too ambiguous to spec without more input)
|
||||||
|
- `phase`: "spec"
|
||||||
|
- `skill`: "spec"
|
||||||
|
- `file_path`: the output_path where the spec was written (absolute path)
|
||||||
|
- `runner_output`: ""
|
||||||
|
- `verified`: true if the file was written successfully
|
||||||
|
- `message`: "spec written: <one-line summary of what was specced>"
|
||||||
|
|
||||||
|
## Spec structure
|
||||||
|
Write the spec as markdown to the output_path:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# [Feature] Spec
|
||||||
|
|
||||||
|
## Problem statement
|
||||||
|
[What problem does this solve? For whom? Why now?]
|
||||||
|
|
||||||
|
## Success criteria
|
||||||
|
- [ ] [Criterion 1 — measurable and verifiable]
|
||||||
|
- [ ] [Criterion 2 — measurable and verifiable]
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
[Non-negotiable requirements the solution must satisfy]
|
||||||
|
|
||||||
|
## Out of scope
|
||||||
|
[What we are explicitly NOT doing in this iteration]
|
||||||
|
|
||||||
|
## Technical approach
|
||||||
|
[Architecture decisions, key components, rationale for each choice]
|
||||||
|
|
||||||
|
## Risks
|
||||||
|
[What could go wrong, and how we'd mitigate it]
|
||||||
|
```
|
||||||
|
|
||||||
|
If the requirements are too vague to produce measurable success criteria, return status "error" with a message listing the specific questions that need answers.
|
||||||
31
config/supervisor/trainer-reader.md
Normal file
31
config/supervisor/trainer-reader.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Trainer Reader Discipline
|
||||||
|
|
||||||
|
You scan session logs and identify candidate learning moments worth converting to training data.
|
||||||
|
|
||||||
|
## What to look for
|
||||||
|
- **SFT candidates**: the worker did exactly the right thing — a clean pattern worth reinforcing
|
||||||
|
- **DPO candidates**: the worker first produced a wrong or suboptimal response, then corrected — you have both rejected and chosen
|
||||||
|
|
||||||
|
## Scoring (1–5)
|
||||||
|
- 5: novel pattern, clearly correct, generalises across projects
|
||||||
|
- 4: good pattern, correct, somewhat project-specific but still useful
|
||||||
|
- 3: correct but obvious — include only if especially clean
|
||||||
|
- 2 or below: skip — too ambiguous or too context-specific
|
||||||
|
|
||||||
|
## Output contract
|
||||||
|
Return JSON result with:
|
||||||
|
- `status`: "pass" or "error"
|
||||||
|
- `phase`: "trainer"
|
||||||
|
- `skill`: "trainer"
|
||||||
|
- `file_path`: ""
|
||||||
|
- `runner_output`: JSON array of candidates (valid JSON, not markdown):
|
||||||
|
[{"type":"sft","moment":"<what happened>","prompt":"<what was asked>","completion":"<what was done right>","score":4},
|
||||||
|
{"type":"dpo","moment":"<what happened>","prompt":"<what was asked>","chosen":"<correct>","rejected":"<incorrect>","score":3}]
|
||||||
|
- `verified`: true
|
||||||
|
- `message`: "N sft candidates, M dpo candidates found"
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
1. Read all session entries in the task prompt
|
||||||
|
2. Score each entry — only include entries scoring >= 3
|
||||||
|
3. Prompt/completion fields must be phrased to generalise: no project-specific paths or names
|
||||||
|
4. If no candidates score >= 3, return an empty array `[]` — never force low-quality candidates
|
||||||
35
config/supervisor/trainer-writer.md
Normal file
35
config/supervisor/trainer-writer.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Trainer Writer Discipline
|
||||||
|
|
||||||
|
You receive candidate learning moments from the reader and write clean SFT/DPO training pairs.
|
||||||
|
|
||||||
|
## Quality gate (apply before writing)
|
||||||
|
- SFT: prompt must be phrased so it could come from any project, not just this one
|
||||||
|
- DPO: chosen and rejected must be clearly distinguishable — skip if a reader can't tell which is better
|
||||||
|
- Never include project-specific paths, variable names, or identifiers in any pair
|
||||||
|
|
||||||
|
## Output contract
|
||||||
|
Return JSON result with:
|
||||||
|
- `status`: "pass" (pairs written or skipped due to quality) or "error" (candidates JSON was malformed)
|
||||||
|
- `phase`: "trainer"
|
||||||
|
- `skill`: "trainer"
|
||||||
|
- `file_path`: path of the last file written (empty if nothing passed quality gate)
|
||||||
|
- `runner_output`: "N SFT pairs written to brain/training-data/sft/, M DPO pairs to brain/training-data/dpo/" or "0 pairs passed quality gate"
|
||||||
|
- `verified`: true if files were written; false if nothing passed
|
||||||
|
- `message`: "N sft + M dpo pairs for session <id>" or "no pairs passed quality gate"
|
||||||
|
|
||||||
|
## File format
|
||||||
|
JSONL — one JSON object per line.
|
||||||
|
|
||||||
|
SFT: `{"prompt": "...", "completion": "..."}`
|
||||||
|
DPO: `{"prompt": "...", "chosen": "...", "rejected": "..."}`
|
||||||
|
|
||||||
|
Write SFT to: `<brain_dir>/training-data/sft/<session_id>.jsonl`
|
||||||
|
Write DPO to: `<brain_dir>/training-data/dpo/<session_id>.jsonl`
|
||||||
|
|
||||||
|
Append to existing files if they exist (don't overwrite).
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
1. Parse the `reader_candidates` JSON from the task prompt
|
||||||
|
2. For each candidate: apply quality gate
|
||||||
|
3. Write passing SFT candidates to sft JSONL, DPO candidates to dpo JSONL
|
||||||
|
4. If nothing passes, return status "pass" with verified: false and message "no pairs passed quality gate"
|
||||||
@@ -33,6 +33,8 @@ type queryRequest struct {
|
|||||||
type writeRequest struct {
|
type writeRequest struct {
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Filename string `json:"filename,omitempty"`
|
Filename string `json:"filename,omitempty"`
|
||||||
|
Type string `json:"type,omitempty"`
|
||||||
|
Domain string `json:"domain,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query handles POST /query — full-text search across the brain wiki.
|
// Query handles POST /query — full-text search across the brain wiki.
|
||||||
@@ -83,8 +85,22 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finalContent := req.Content
|
||||||
|
if req.Type != "" || req.Domain != "" {
|
||||||
|
var fm strings.Builder
|
||||||
|
fm.WriteString("---\n")
|
||||||
|
if req.Type != "" {
|
||||||
|
fmt.Fprintf(&fm, "type: %s\n", req.Type)
|
||||||
|
}
|
||||||
|
if req.Domain != "" {
|
||||||
|
fmt.Fprintf(&fm, "domain: %s\n", req.Domain)
|
||||||
|
}
|
||||||
|
fm.WriteString("---\n")
|
||||||
|
finalContent = fm.String() + req.Content
|
||||||
|
}
|
||||||
|
|
||||||
dest := filepath.Join(rawDir, filepath.Base(filename))
|
dest := filepath.Join(rawDir, filepath.Base(filename))
|
||||||
if err := os.WriteFile(dest, []byte(req.Content), 0o644); err != nil {
|
if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil {
|
||||||
h.logger.Error("write failed", "err", err)
|
h.logger.Error("write failed", "err", err)
|
||||||
http.Error(w, "write error", http.StatusInternalServerError)
|
http.Error(w, "write error", http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -79,6 +79,27 @@ func TestQuery_RequiresQuery(t *testing.T) {
|
|||||||
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWrite_IncludesFrontmatterWhenTypeProvided(t *testing.T) {
|
||||||
|
dir, h := setup(t)
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"content": "Some learning.",
|
||||||
|
"filename": "typed-note.md",
|
||||||
|
"type": "concept",
|
||||||
|
"domain": "software",
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/write", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.Write(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
content, err := os.ReadFile(filepath.Join(dir, "raw", "typed-note.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(content), "type: concept")
|
||||||
|
assert.Contains(t, string(content), "domain: software")
|
||||||
|
assert.Contains(t, string(content), "Some learning.")
|
||||||
|
}
|
||||||
|
|
||||||
func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) {
|
func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) {
|
||||||
dir, h := setup(t)
|
dir, h := setup(t)
|
||||||
body, _ := json.Marshal(map[string]any{"content": "auto name"})
|
body, _ := json.Marshal(map[string]any{"content": "auto name"})
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ type Config struct {
|
|||||||
LiteLLMAPIKey string // LITELLM_API_KEY
|
LiteLLMAPIKey string // LITELLM_API_KEY
|
||||||
ConfigDir string // SUPERVISOR_CONFIG_DIR, default ./config/supervisor
|
ConfigDir string // SUPERVISOR_CONFIG_DIR, default ./config/supervisor
|
||||||
ModelsFile string // SUPERVISOR_MODELS_FILE, default <ConfigDir>/../models.yaml
|
ModelsFile string // SUPERVISOR_MODELS_FILE, default <ConfigDir>/../models.yaml
|
||||||
|
IngestBaseURL string // INGEST_BASE_URL, default http://localhost:3300
|
||||||
|
SessionsDir string // SUPERVISOR_SESSIONS_DIR, default ./brain/sessions
|
||||||
|
BrainDir string // SUPERVISOR_BRAIN_DIR, default ./brain
|
||||||
}
|
}
|
||||||
|
|
||||||
func Load() (Config, error) {
|
func Load() (Config, error) {
|
||||||
@@ -18,6 +21,9 @@ func Load() (Config, error) {
|
|||||||
ConfigDir: envOr("SUPERVISOR_CONFIG_DIR", "./config/supervisor"),
|
ConfigDir: envOr("SUPERVISOR_CONFIG_DIR", "./config/supervisor"),
|
||||||
}
|
}
|
||||||
cfg.ModelsFile = envOr("SUPERVISOR_MODELS_FILE", cfg.ConfigDir+"/../models.yaml")
|
cfg.ModelsFile = envOr("SUPERVISOR_MODELS_FILE", cfg.ConfigDir+"/../models.yaml")
|
||||||
|
cfg.IngestBaseURL = envOr("INGEST_BASE_URL", "http://localhost:3300")
|
||||||
|
cfg.SessionsDir = envOr("SUPERVISOR_SESSIONS_DIR", "./brain/sessions")
|
||||||
|
cfg.BrainDir = envOr("SUPERVISOR_BRAIN_DIR", "./brain")
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,12 +13,18 @@ func TestLoadDefaults(t *testing.T) {
|
|||||||
t.Setenv("LITELLM_BASE_URL", "")
|
t.Setenv("LITELLM_BASE_URL", "")
|
||||||
t.Setenv("LITELLM_API_KEY", "")
|
t.Setenv("LITELLM_API_KEY", "")
|
||||||
t.Setenv("SUPERVISOR_CONFIG_DIR", "")
|
t.Setenv("SUPERVISOR_CONFIG_DIR", "")
|
||||||
|
t.Setenv("INGEST_BASE_URL", "")
|
||||||
|
t.Setenv("SUPERVISOR_SESSIONS_DIR", "")
|
||||||
|
t.Setenv("SUPERVISOR_BRAIN_DIR", "")
|
||||||
|
|
||||||
cfg, err := config.Load()
|
cfg, err := config.Load()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "3200", cfg.Port)
|
assert.Equal(t, "3200", cfg.Port)
|
||||||
assert.Equal(t, "http://iguana:4000", cfg.LiteLLMBaseURL)
|
assert.Equal(t, "http://iguana:4000", cfg.LiteLLMBaseURL)
|
||||||
assert.Equal(t, "./config/supervisor", cfg.ConfigDir)
|
assert.Equal(t, "./config/supervisor", cfg.ConfigDir)
|
||||||
|
assert.Equal(t, "http://localhost:3300", cfg.IngestBaseURL)
|
||||||
|
assert.Equal(t, "./brain/sessions", cfg.SessionsDir)
|
||||||
|
assert.Equal(t, "./brain", cfg.BrainDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoadFromEnv(t *testing.T) {
|
func TestLoadFromEnv(t *testing.T) {
|
||||||
|
|||||||
@@ -68,11 +68,10 @@ func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
|||||||
|
|
||||||
args := []string{
|
args := []string{
|
||||||
"--print",
|
"--print",
|
||||||
"--bare",
|
|
||||||
"--permission-mode", "bypassPermissions",
|
"--permission-mode", "bypassPermissions",
|
||||||
"--tools", tools,
|
"--tools", tools,
|
||||||
"--json-schema", Schema,
|
"--json-schema", Schema,
|
||||||
"--output-format", "text",
|
"--output-format", "json",
|
||||||
prompt,
|
prompt,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,12 +88,21 @@ func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
|||||||
return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
|
return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
var r Result
|
// --output-format json wraps the response in an envelope; structured output
|
||||||
if err := json.Unmarshal(stdout.Bytes(), &r); err != nil {
|
// from --json-schema is in the "structured_output" field.
|
||||||
return Result{}, fmt.Errorf("parse result JSON: %w — raw output: %s", err, stdout.String())
|
var envelope struct {
|
||||||
|
StructuredOutput *Result `json:"structured_output"`
|
||||||
|
IsError bool `json:"is_error"`
|
||||||
|
Result string `json:"result"` // fallback text result for error messages
|
||||||
}
|
}
|
||||||
if err := r.Validate(); err != nil {
|
if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("parse envelope JSON: %w — raw: %s — stderr: %s", err, stdout.String(), stderr.String())
|
||||||
|
}
|
||||||
|
if envelope.StructuredOutput == nil {
|
||||||
|
return Result{}, fmt.Errorf("no structured_output in response — result: %s — stderr: %s", envelope.Result, stderr.String())
|
||||||
|
}
|
||||||
|
if err := envelope.StructuredOutput.Validate(); err != nil {
|
||||||
return Result{}, fmt.Errorf("invalid result: %w", err)
|
return Result{}, fmt.Errorf("invalid result: %w", err)
|
||||||
}
|
}
|
||||||
return r, nil
|
return *envelope.StructuredOutput, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,8 +28,10 @@ func fakeClaudePath(t *testing.T, output string, exitCode int) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestExecutorParsesValidResult(t *testing.T) {
|
func TestExecutorParsesValidResult(t *testing.T) {
|
||||||
validJSON := `{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}`
|
// Fake claude emits the --output-format json envelope that the real CLI produces.
|
||||||
claude := fakeClaudePath(t, validJSON, 0)
|
// The executor extracts the result from the "structured_output" field.
|
||||||
|
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}}`
|
||||||
|
claude := fakeClaudePath(t, envelope, 0)
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
ex := iexec.New(iexec.Config{
|
||||||
ClaudeBinary: claude,
|
ClaudeBinary: claude,
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
// validates its own output before returning.
|
// validates its own output before returning.
|
||||||
type Result struct {
|
type Result struct {
|
||||||
Status string `json:"status"` // pass | fail | error
|
Status string `json:"status"` // pass | fail | error
|
||||||
Phase string `json:"phase"` // red | green | refactor
|
Phase string `json:"phase"` // red | green | refactor | retrospective | review | debug | spec | trainer
|
||||||
Skill string `json:"skill"` // tdd | review | ...
|
Skill string `json:"skill"` // tdd | review | ...
|
||||||
FilePath string `json:"file_path"` // absolute path to generated file
|
FilePath string `json:"file_path"` // absolute path to generated file
|
||||||
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
||||||
@@ -25,6 +25,10 @@ var validPhases = map[string]bool{
|
|||||||
"green": true,
|
"green": true,
|
||||||
"refactor": true,
|
"refactor": true,
|
||||||
"retrospective": true,
|
"retrospective": true,
|
||||||
|
"review": true,
|
||||||
|
"debug": true,
|
||||||
|
"spec": true,
|
||||||
|
"trainer": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r Result) Validate() error {
|
func (r Result) Validate() error {
|
||||||
@@ -33,7 +37,7 @@ func (r Result) Validate() error {
|
|||||||
errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
|
errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
|
||||||
}
|
}
|
||||||
if !validPhases[r.Phase] {
|
if !validPhases[r.Phase] {
|
||||||
errs = append(errs, "phase must be red|green|refactor, got: "+r.Phase)
|
errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase)
|
||||||
}
|
}
|
||||||
if r.Skill == "" {
|
if r.Skill == "" {
|
||||||
errs = append(errs, "skill is required")
|
errs = append(errs, "skill is required")
|
||||||
@@ -50,7 +54,7 @@ const Schema = `{
|
|||||||
"required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
|
"required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"status": {"type": "string", "enum": ["pass","fail","error"]},
|
"status": {"type": "string", "enum": ["pass","fail","error"]},
|
||||||
"phase": {"type": "string", "enum": ["red","green","refactor"]},
|
"phase": {"type": "string"},
|
||||||
"skill": {"type": "string"},
|
"skill": {"type": "string"},
|
||||||
"file_path": {"type": "string"},
|
"file_path": {"type": "string"},
|
||||||
"runner_output": {"type": "string"},
|
"runner_output": {"type": "string"},
|
||||||
|
|||||||
@@ -69,3 +69,11 @@ func TestResultValidation(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestValidateAcceptsAllPhases(t *testing.T) {
|
||||||
|
phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"}
|
||||||
|
for _, phase := range phases {
|
||||||
|
r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"}
|
||||||
|
assert.NoError(t, r.Validate(), "phase %q should be valid", phase)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(response{
|
_ = json.NewEncoder(w).Encode(response{
|
||||||
JSONRPC: "2.0",
|
JSONRPC: "2.0",
|
||||||
ID: req.ID,
|
ID: req.ID,
|
||||||
Result: result,
|
Result: result,
|
||||||
@@ -90,7 +90,7 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
func writeError(w http.ResponseWriter, id any, code int, msg string) {
|
func writeError(w http.ResponseWriter, id any, code int, msg string) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(response{
|
_ = json.NewEncoder(w).Encode(response{
|
||||||
JSONRPC: "2.0",
|
JSONRPC: "2.0",
|
||||||
ID: id,
|
ID: id,
|
||||||
Error: &rpcError{Code: code, Message: msg},
|
Error: &rpcError{Code: code, Message: msg},
|
||||||
|
|||||||
38
internal/session/history.go
Normal file
38
internal/session/history.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
// internal/session/history.go
|
||||||
|
package session
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FormatHistory formats prior session entries as a structured block for
|
||||||
|
// injection into a worker task prompt. Entries matching excludePhase are
|
||||||
|
// omitted (pass the current phase to avoid circular injection).
|
||||||
|
func FormatHistory(entries []Entry, excludePhase string) string {
|
||||||
|
var filtered []Entry
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.Phase != excludePhase {
|
||||||
|
filtered = append(filtered, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(filtered) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteString("## Session history\n\n")
|
||||||
|
for _, e := range filtered {
|
||||||
|
fmt.Fprintf(&b, "### Phase: %s\n", e.Phase) //nolint:errcheck // strings.Builder never errors
|
||||||
|
fmt.Fprintf(&b, "- Skill: %s\n", e.Skill) //nolint:errcheck
|
||||||
|
fmt.Fprintf(&b, "- Status: %s\n", e.FinalStatus) //nolint:errcheck
|
||||||
|
if e.FilePath != "" {
|
||||||
|
fmt.Fprintf(&b, "- File: %s\n", e.FilePath) //nolint:errcheck
|
||||||
|
}
|
||||||
|
if e.Message != "" {
|
||||||
|
fmt.Fprintf(&b, "- Summary: %s\n", e.Message) //nolint:errcheck
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
41
internal/session/history_test.go
Normal file
41
internal/session/history_test.go
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
// internal/session/history_test.go
|
||||||
|
package session_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFormatHistoryEmpty(t *testing.T) {
|
||||||
|
result := session.FormatHistory(nil, "")
|
||||||
|
assert.Equal(t, "", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatHistoryFormatsEntries(t *testing.T) {
|
||||||
|
entries := []session.Entry{
|
||||||
|
{
|
||||||
|
Skill: "tdd", Phase: "red", FinalStatus: "pass",
|
||||||
|
FilePath: "internal/foo/foo_test.go",
|
||||||
|
Message: "wrote failing test for Foo",
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result := session.FormatHistory(entries, "")
|
||||||
|
assert.Contains(t, result, "## Session history")
|
||||||
|
assert.Contains(t, result, "Phase: red")
|
||||||
|
assert.Contains(t, result, "wrote failing test for Foo")
|
||||||
|
assert.Contains(t, result, "internal/foo/foo_test.go")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatHistoryExcludesCurrentPhase(t *testing.T) {
|
||||||
|
entries := []session.Entry{
|
||||||
|
{Skill: "tdd", Phase: "red", Message: "red done", FinalStatus: "pass"},
|
||||||
|
{Skill: "tdd", Phase: "green", Message: "green done", FinalStatus: "pass"},
|
||||||
|
}
|
||||||
|
result := session.FormatHistory(entries, "green")
|
||||||
|
assert.Contains(t, result, "red done")
|
||||||
|
assert.NotContains(t, result, "green done")
|
||||||
|
}
|
||||||
@@ -73,7 +73,7 @@ func Read(sessionsDir, sessionID string) ([]Entry, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("open session log: %w", err)
|
return nil, fmt.Errorf("open session log: %w", err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
var entries []Entry
|
var entries []Entry
|
||||||
scanner := bufio.NewScanner(f)
|
scanner := bufio.NewScanner(f)
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ func TestHandle_BrainQuery_CallsIngestServer(t *testing.T) {
|
|||||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
assert.Equal(t, "/query", r.URL.Path)
|
assert.Equal(t, "/query", r.URL.Path)
|
||||||
called = true
|
called = true
|
||||||
json.NewEncoder(w).Encode(map[string]any{
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
"results": []map[string]any{
|
"results": []map[string]any{
|
||||||
{"path": "wiki/concepts/tdd.md", "title": "TDD", "excerpt": "Test-driven development.", "score": 3},
|
{"path": "wiki/concepts/tdd.md", "title": "TDD", "excerpt": "Test-driven development.", "score": 3},
|
||||||
},
|
},
|
||||||
@@ -45,7 +45,7 @@ func TestHandle_BrainWrite_CallsIngestServer(t *testing.T) {
|
|||||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&body))
|
require.NoError(t, json.NewDecoder(r.Body).Decode(&body))
|
||||||
assert.Equal(t, "concept", body["type"])
|
assert.Equal(t, "concept", body["type"])
|
||||||
assert.Equal(t, "# Test\n\nSome learning.", body["content"])
|
assert.Equal(t, "# Test\n\nSome learning.", body["content"])
|
||||||
json.NewEncoder(w).Encode(map[string]string{"path": "raw/test.md"})
|
_ = json.NewEncoder(w).Encode(map[string]string{"path": "raw/test.md"})
|
||||||
}))
|
}))
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
|
|||||||
80
internal/skills/debug/handlers.go
Normal file
80
internal/skills/debug/handlers.go
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
// internal/skills/debug/handlers.go
|
||||||
|
package debug
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
)
|
||||||
|
|
||||||
|
type debugArgs struct {
|
||||||
|
ProjectRoot string `json:"project_root"`
|
||||||
|
Error string `json:"error"`
|
||||||
|
Context string `json:"context"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle dispatches the MCP tool call to the appropriate handler.
|
||||||
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if tool != "debug" {
|
||||||
|
return nil, fmt.Errorf("unknown tool: %s", tool)
|
||||||
|
}
|
||||||
|
var a debugArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.ProjectRoot == "" {
|
||||||
|
return nil, fmt.Errorf("project_root is required")
|
||||||
|
}
|
||||||
|
if a.Error == "" {
|
||||||
|
return nil, fmt.Errorf("error is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
model := a.Model
|
||||||
|
if model == "" {
|
||||||
|
model = s.cfg.DefaultModel
|
||||||
|
}
|
||||||
|
|
||||||
|
task := fmt.Sprintf(
|
||||||
|
"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
|
||||||
|
a.ProjectRoot, a.Error, a.Context, model,
|
||||||
|
)
|
||||||
|
task = s.prependHistory(a.SessionID, "debug", task)
|
||||||
|
|
||||||
|
if s.cfg.ExecutorFn == nil {
|
||||||
|
return nil, fmt.Errorf("no executor configured")
|
||||||
|
}
|
||||||
|
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||||
|
SkillPrompt: s.cfg.SkillPrompt,
|
||||||
|
TaskPrompt: task,
|
||||||
|
Model: model,
|
||||||
|
Tools: "Read,Bash",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(result)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal result: %w", err)
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||||
|
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||||
|
if err != nil || len(entries) == 0 {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
history := session.FormatHistory(entries, currentPhase)
|
||||||
|
if history == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
return history + "\n---\n\n" + task
|
||||||
|
}
|
||||||
61
internal/skills/debug/handlers_test.go
Normal file
61
internal/skills/debug/handlers_test.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
// internal/skills/debug/handlers_test.go
|
||||||
|
package debug_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDebugToolRegistered(t *testing.T) {
|
||||||
|
sk := debug.New(debug.Config{SkillPrompt: "debug rules"})
|
||||||
|
names := make([]string, 0)
|
||||||
|
for _, tool := range sk.Tools() {
|
||||||
|
names = append(names, tool.Name)
|
||||||
|
}
|
||||||
|
assert.Contains(t, names, "debug")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDebugRequiresProjectRoot(t *testing.T) {
|
||||||
|
sk := debug.New(debug.Config{SkillPrompt: "d"})
|
||||||
|
_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`))
|
||||||
|
assert.ErrorContains(t, err, "project_root")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDebugRequiresError(t *testing.T) {
|
||||||
|
sk := debug.New(debug.Config{SkillPrompt: "d"})
|
||||||
|
_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`))
|
||||||
|
assert.ErrorContains(t, err, "error")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDebugCallsExecutor(t *testing.T) {
|
||||||
|
called := false
|
||||||
|
var capturedTask string
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
called = true
|
||||||
|
capturedTask = req.TaskPrompt
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass", Phase: "debug", Skill: "debug",
|
||||||
|
RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference",
|
||||||
|
Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||||
|
out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, called)
|
||||||
|
assert.Contains(t, capturedTask, "panic: nil pointer dereference")
|
||||||
|
assert.Contains(t, capturedTask, "occurs on startup")
|
||||||
|
|
||||||
|
var result iexec.Result
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
assert.Equal(t, "debug", result.Phase)
|
||||||
|
}
|
||||||
55
internal/skills/debug/skill.go
Normal file
55
internal/skills/debug/skill.go
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// internal/skills/debug/skill.go
|
||||||
|
package debug
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExecutorFn is the function signature for running a worker subprocess.
|
||||||
|
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||||
|
|
||||||
|
// Config holds dependencies for the debug skill.
|
||||||
|
type Config struct {
|
||||||
|
SkillPrompt string
|
||||||
|
DefaultModel string
|
||||||
|
ExecutorFn ExecutorFn
|
||||||
|
SessionsDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skill implements the debug MCP tool.
|
||||||
|
type Skill struct{ cfg Config }
|
||||||
|
|
||||||
|
// New creates a new debug Skill.
|
||||||
|
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
||||||
|
|
||||||
|
// Name returns the skill identifier.
|
||||||
|
func (s *Skill) Name() string { return "debug" }
|
||||||
|
|
||||||
|
// Tools returns the MCP tool definitions for this skill.
|
||||||
|
func (s *Skill) Tools() []registry.ToolDef {
|
||||||
|
schema := func(required []string, props map[string]any) json.RawMessage {
|
||||||
|
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
str := map[string]any{"type": "string"}
|
||||||
|
return []registry.ToolDef{
|
||||||
|
{
|
||||||
|
Name: "debug",
|
||||||
|
Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.",
|
||||||
|
InputSchema: schema(
|
||||||
|
[]string{"project_root", "error"},
|
||||||
|
map[string]any{
|
||||||
|
"project_root": str,
|
||||||
|
"error": str,
|
||||||
|
"context": str,
|
||||||
|
"model": str,
|
||||||
|
"session_id": str,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
81
internal/skills/review/handlers.go
Normal file
81
internal/skills/review/handlers.go
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
// internal/skills/review/handlers.go
|
||||||
|
package review
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
)
|
||||||
|
|
||||||
|
type reviewArgs struct {
|
||||||
|
ProjectRoot string `json:"project_root"`
|
||||||
|
Files []string `json:"files"`
|
||||||
|
Context string `json:"context"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle dispatches the MCP tool call to the appropriate handler.
|
||||||
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if tool != "review" {
|
||||||
|
return nil, fmt.Errorf("unknown tool: %s", tool)
|
||||||
|
}
|
||||||
|
var a reviewArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.ProjectRoot == "" {
|
||||||
|
return nil, fmt.Errorf("project_root is required")
|
||||||
|
}
|
||||||
|
if len(a.Files) == 0 {
|
||||||
|
return nil, fmt.Errorf("files is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
model := a.Model
|
||||||
|
if model == "" {
|
||||||
|
model = s.cfg.DefaultModel
|
||||||
|
}
|
||||||
|
|
||||||
|
task := fmt.Sprintf(
|
||||||
|
"phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s",
|
||||||
|
a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model,
|
||||||
|
)
|
||||||
|
task = s.prependHistory(a.SessionID, "review", task)
|
||||||
|
|
||||||
|
if s.cfg.ExecutorFn == nil {
|
||||||
|
return nil, fmt.Errorf("no executor configured")
|
||||||
|
}
|
||||||
|
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||||
|
SkillPrompt: s.cfg.SkillPrompt,
|
||||||
|
TaskPrompt: task,
|
||||||
|
Model: model,
|
||||||
|
Tools: "Read,Bash",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(result)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal result: %w", err)
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||||
|
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||||
|
if err != nil || len(entries) == 0 {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
history := session.FormatHistory(entries, currentPhase)
|
||||||
|
if history == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
return history + "\n---\n\n" + task
|
||||||
|
}
|
||||||
61
internal/skills/review/handlers_test.go
Normal file
61
internal/skills/review/handlers_test.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
// internal/skills/review/handlers_test.go
|
||||||
|
package review_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/review"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReviewToolRegistered(t *testing.T) {
|
||||||
|
sk := review.New(review.Config{SkillPrompt: "review rules"})
|
||||||
|
names := make([]string, 0)
|
||||||
|
for _, tool := range sk.Tools() {
|
||||||
|
names = append(names, tool.Name)
|
||||||
|
}
|
||||||
|
assert.Contains(t, names, "review")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReviewRequiresProjectRoot(t *testing.T) {
|
||||||
|
sk := review.New(review.Config{SkillPrompt: "r"})
|
||||||
|
_, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"files":["main.go"]}`))
|
||||||
|
assert.ErrorContains(t, err, "project_root")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReviewRequiresFiles(t *testing.T) {
|
||||||
|
sk := review.New(review.Config{SkillPrompt: "r"})
|
||||||
|
_, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"project_root":"/tmp"}`))
|
||||||
|
assert.ErrorContains(t, err, "files")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReviewCallsExecutor(t *testing.T) {
|
||||||
|
called := false
|
||||||
|
var capturedTask string
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
called = true
|
||||||
|
capturedTask = req.TaskPrompt
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass", Phase: "review", Skill: "review",
|
||||||
|
Verified: true, ModelUsed: "self", Message: "2 warnings found",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := review.New(review.Config{SkillPrompt: "review rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||||
|
out, err := sk.Handle(context.Background(), "review", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, called)
|
||||||
|
assert.Contains(t, capturedTask, "internal/foo/foo.go")
|
||||||
|
assert.Contains(t, capturedTask, "PR: add Foo helper")
|
||||||
|
|
||||||
|
var result iexec.Result
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
assert.Equal(t, "review", result.Phase)
|
||||||
|
}
|
||||||
55
internal/skills/review/skill.go
Normal file
55
internal/skills/review/skill.go
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// internal/skills/review/skill.go
|
||||||
|
package review
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExecutorFn is the function signature for running a worker subprocess.
|
||||||
|
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||||
|
|
||||||
|
// Config holds dependencies for the review skill.
|
||||||
|
type Config struct {
|
||||||
|
SkillPrompt string
|
||||||
|
DefaultModel string
|
||||||
|
ExecutorFn ExecutorFn
|
||||||
|
SessionsDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skill implements the review MCP tool.
|
||||||
|
type Skill struct{ cfg Config }
|
||||||
|
|
||||||
|
// New creates a new review Skill.
|
||||||
|
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
||||||
|
|
||||||
|
// Name returns the skill identifier.
|
||||||
|
func (s *Skill) Name() string { return "review" }
|
||||||
|
|
||||||
|
// Tools returns the MCP tool definitions for this skill.
|
||||||
|
func (s *Skill) Tools() []registry.ToolDef {
|
||||||
|
schema := func(required []string, props map[string]any) json.RawMessage {
|
||||||
|
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
str := map[string]any{"type": "string"}
|
||||||
|
return []registry.ToolDef{
|
||||||
|
{
|
||||||
|
Name: "review",
|
||||||
|
Description: "Perform a structured code review of the specified files. Returns findings with severity levels.",
|
||||||
|
InputSchema: schema(
|
||||||
|
[]string{"project_root", "files"},
|
||||||
|
map[string]any{
|
||||||
|
"project_root": str,
|
||||||
|
"files": map[string]any{"type": "array", "items": map[string]any{"type": "string"}},
|
||||||
|
"context": str,
|
||||||
|
"model": str,
|
||||||
|
"session_id": str,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
85
internal/skills/spec/handlers.go
Normal file
85
internal/skills/spec/handlers.go
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
// internal/skills/spec/handlers.go
|
||||||
|
package spec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
)
|
||||||
|
|
||||||
|
type specArgs struct {
|
||||||
|
ProjectRoot string `json:"project_root"`
|
||||||
|
Requirements string `json:"requirements"`
|
||||||
|
OutputPath string `json:"output_path"`
|
||||||
|
Context string `json:"context"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle dispatches the MCP tool call to the appropriate handler.
|
||||||
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if tool != "spec" {
|
||||||
|
return nil, fmt.Errorf("unknown tool: %s", tool)
|
||||||
|
}
|
||||||
|
var a specArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.ProjectRoot == "" {
|
||||||
|
return nil, fmt.Errorf("project_root is required")
|
||||||
|
}
|
||||||
|
if a.Requirements == "" {
|
||||||
|
return nil, fmt.Errorf("requirements is required")
|
||||||
|
}
|
||||||
|
outputPath := a.OutputPath
|
||||||
|
if outputPath == "" {
|
||||||
|
outputPath = "docs/spec.md"
|
||||||
|
}
|
||||||
|
|
||||||
|
model := a.Model
|
||||||
|
if model == "" {
|
||||||
|
model = s.cfg.DefaultModel
|
||||||
|
}
|
||||||
|
|
||||||
|
task := fmt.Sprintf(
|
||||||
|
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
||||||
|
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
||||||
|
)
|
||||||
|
task = s.prependHistory(a.SessionID, "spec", task)
|
||||||
|
|
||||||
|
if s.cfg.ExecutorFn == nil {
|
||||||
|
return nil, fmt.Errorf("no executor configured")
|
||||||
|
}
|
||||||
|
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||||
|
SkillPrompt: s.cfg.SkillPrompt,
|
||||||
|
TaskPrompt: task,
|
||||||
|
Model: model,
|
||||||
|
Tools: "Read,Write",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(result)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal result: %w", err)
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||||
|
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||||
|
if err != nil || len(entries) == 0 {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
history := session.FormatHistory(entries, currentPhase)
|
||||||
|
if history == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
return history + "\n---\n\n" + task
|
||||||
|
}
|
||||||
61
internal/skills/spec/handlers_test.go
Normal file
61
internal/skills/spec/handlers_test.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
// internal/skills/spec/handlers_test.go
|
||||||
|
package spec_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSpecToolRegistered(t *testing.T) {
|
||||||
|
sk := spec.New(spec.Config{SkillPrompt: "spec rules"})
|
||||||
|
names := make([]string, 0)
|
||||||
|
for _, tool := range sk.Tools() {
|
||||||
|
names = append(names, tool.Name)
|
||||||
|
}
|
||||||
|
assert.Contains(t, names, "spec")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpecRequiresProjectRoot(t *testing.T) {
|
||||||
|
sk := spec.New(spec.Config{SkillPrompt: "s"})
|
||||||
|
_, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"requirements":"add login"}`))
|
||||||
|
assert.ErrorContains(t, err, "project_root")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpecRequiresRequirements(t *testing.T) {
|
||||||
|
sk := spec.New(spec.Config{SkillPrompt: "s"})
|
||||||
|
_, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"project_root":"/tmp"}`))
|
||||||
|
assert.ErrorContains(t, err, "requirements")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpecCallsExecutor(t *testing.T) {
|
||||||
|
called := false
|
||||||
|
var capturedTask string
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
called = true
|
||||||
|
capturedTask = req.TaskPrompt
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass", Phase: "spec", Skill: "spec",
|
||||||
|
FilePath: "/tmp/proj/docs/login-spec.md",
|
||||||
|
Verified: true, ModelUsed: "self", Message: "spec written: login feature",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := spec.New(spec.Config{SkillPrompt: "spec rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||||
|
out, err := sk.Handle(context.Background(), "spec", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, called)
|
||||||
|
assert.Contains(t, capturedTask, "OAuth2 login")
|
||||||
|
assert.Contains(t, capturedTask, "docs/login-spec.md")
|
||||||
|
|
||||||
|
var result iexec.Result
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
assert.Equal(t, "spec", result.Phase)
|
||||||
|
}
|
||||||
56
internal/skills/spec/skill.go
Normal file
56
internal/skills/spec/skill.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
// internal/skills/spec/skill.go
|
||||||
|
package spec
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExecutorFn is the function signature for running a worker subprocess.
|
||||||
|
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||||
|
|
||||||
|
// Config holds dependencies for the spec skill.
|
||||||
|
type Config struct {
|
||||||
|
SkillPrompt string
|
||||||
|
DefaultModel string
|
||||||
|
ExecutorFn ExecutorFn
|
||||||
|
SessionsDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skill implements the spec MCP tool.
|
||||||
|
type Skill struct{ cfg Config }
|
||||||
|
|
||||||
|
// New creates a new spec Skill.
|
||||||
|
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
||||||
|
|
||||||
|
// Name returns the skill identifier.
|
||||||
|
func (s *Skill) Name() string { return "spec" }
|
||||||
|
|
||||||
|
// Tools returns the MCP tool definitions for this skill.
|
||||||
|
func (s *Skill) Tools() []registry.ToolDef {
|
||||||
|
schema := func(required []string, props map[string]any) json.RawMessage {
|
||||||
|
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
str := map[string]any{"type": "string"}
|
||||||
|
return []registry.ToolDef{
|
||||||
|
{
|
||||||
|
Name: "spec",
|
||||||
|
Description: "Generate a structured implementation spec from requirements. Writes the spec to output_path in the project.",
|
||||||
|
InputSchema: schema(
|
||||||
|
[]string{"project_root", "requirements"},
|
||||||
|
map[string]any{
|
||||||
|
"project_root": str,
|
||||||
|
"requirements": str,
|
||||||
|
"output_path": str,
|
||||||
|
"context": str,
|
||||||
|
"model": str,
|
||||||
|
"session_id": str,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
@@ -51,6 +52,7 @@ type greenArgs struct {
|
|||||||
TestPath string `json:"test_path"`
|
TestPath string `json:"test_path"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
TestCmd string `json:"test_cmd"`
|
TestCmd string `json:"test_cmd"`
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
||||||
@@ -68,6 +70,7 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
|
|||||||
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||||
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
||||||
)
|
)
|
||||||
|
task = s.prependHistory(args.SessionID, "green", task)
|
||||||
return s.execute(ctx, task)
|
return s.execute(ctx, task)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,6 +80,7 @@ type refactorArgs struct {
|
|||||||
ImplPath string `json:"impl_path"`
|
ImplPath string `json:"impl_path"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
TestCmd string `json:"test_cmd"`
|
TestCmd string `json:"test_cmd"`
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
||||||
@@ -97,9 +101,25 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
|
|||||||
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||||
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
||||||
)
|
)
|
||||||
|
task = s.prependHistory(args.SessionID, "refactor", task)
|
||||||
return s.execute(ctx, task)
|
return s.execute(ctx, task)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||||
|
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||||
|
if err != nil || len(entries) == 0 {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
history := session.FormatHistory(entries, currentPhase)
|
||||||
|
if history == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
return history + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Skill) resolveModel(override string) string {
|
func (s *Skill) resolveModel(override string) string {
|
||||||
if override != "" {
|
if override != "" {
|
||||||
return override
|
return override
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@@ -41,5 +43,43 @@ func TestTDDRedRequiresSpec(t *testing.T) {
|
|||||||
assert.ErrorContains(t, err, "spec")
|
assert.ErrorContains(t, err, "spec")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTDDGreenInjectsSessionHistory(t *testing.T) {
|
||||||
|
sessDir := t.TempDir()
|
||||||
|
require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{
|
||||||
|
SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass",
|
||||||
|
FilePath: "internal/foo/foo_test.go",
|
||||||
|
Message: "wrote failing test for Foo",
|
||||||
|
}))
|
||||||
|
|
||||||
|
var capturedPrompt string
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
capturedPrompt = req.TaskPrompt
|
||||||
|
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir})
|
||||||
|
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, capturedPrompt, "## Session history")
|
||||||
|
assert.Contains(t, capturedPrompt, "wrote failing test for Foo")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
|
||||||
|
var capturedPrompt string
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
capturedPrompt = req.TaskPrompt
|
||||||
|
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||||
|
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotContains(t, capturedPrompt, "## Session history")
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure require is used (avoids import error).
|
// Ensure require is used (avoids import error).
|
||||||
var _ = require.New
|
var _ = require.New
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ type Config struct {
|
|||||||
SkillPrompt string
|
SkillPrompt string
|
||||||
ExecutorFn ExecutorFn // nil = no executor (tests that don't reach execute())
|
ExecutorFn ExecutorFn // nil = no executor (tests that don't reach execute())
|
||||||
DefaultModel string
|
DefaultModel string
|
||||||
|
SessionsDir string // optional: path to brain/sessions/ for history injection
|
||||||
}
|
}
|
||||||
|
|
||||||
type Skill struct {
|
type Skill struct {
|
||||||
@@ -63,6 +64,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
"test_path": strProp,
|
"test_path": strProp,
|
||||||
"model": strProp,
|
"model": strProp,
|
||||||
"test_cmd": strProp,
|
"test_cmd": strProp,
|
||||||
|
"session_id": strProp,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
@@ -77,6 +79,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
"impl_path": strProp,
|
"impl_path": strProp,
|
||||||
"model": strProp,
|
"model": strProp,
|
||||||
"test_cmd": strProp,
|
"test_cmd": strProp,
|
||||||
|
"session_id": strProp,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|||||||
80
internal/skills/trainer/handlers.go
Normal file
80
internal/skills/trainer/handlers.go
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
// internal/skills/trainer/handlers.go
|
||||||
|
package trainer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
)
|
||||||
|
|
||||||
|
type trainArgs struct {
|
||||||
|
SessionID string `json:"session_id"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle dispatches the MCP tool call to the trainer handler.
|
||||||
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if tool != "trainer" {
|
||||||
|
return nil, fmt.Errorf("unknown tool: %s", tool)
|
||||||
|
}
|
||||||
|
var a trainArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.SessionID == "" {
|
||||||
|
return nil, fmt.Errorf("session_id is required")
|
||||||
|
}
|
||||||
|
if s.cfg.ExecutorFn == nil {
|
||||||
|
return nil, fmt.Errorf("no executor configured")
|
||||||
|
}
|
||||||
|
|
||||||
|
model := a.Model
|
||||||
|
if model == "" {
|
||||||
|
model = s.cfg.DefaultModel
|
||||||
|
}
|
||||||
|
|
||||||
|
entries, err := session.Read(s.cfg.SessionsDir, a.SessionID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read session log: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 1: Reader agent ─────────────────────────────────────────────────
|
||||||
|
history := session.FormatHistory(entries, "")
|
||||||
|
readerTask := fmt.Sprintf(
|
||||||
|
"role: reader\nsession_id: %s\nbrain_dir: %s\n\n%s",
|
||||||
|
a.SessionID, s.cfg.BrainDir, history,
|
||||||
|
)
|
||||||
|
readerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||||
|
SkillPrompt: s.cfg.ReaderPrompt,
|
||||||
|
TaskPrompt: readerTask,
|
||||||
|
Model: model,
|
||||||
|
Tools: "Read",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("reader agent: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 2: Writer agent (receives reader candidates) ────────────────────
|
||||||
|
writerTask := fmt.Sprintf(
|
||||||
|
"role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_summary: %s\nreader_candidates:\n%s",
|
||||||
|
a.SessionID, s.cfg.BrainDir, readerResult.Message, readerResult.RunnerOutput,
|
||||||
|
)
|
||||||
|
writerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||||
|
SkillPrompt: s.cfg.WriterPrompt,
|
||||||
|
TaskPrompt: writerTask,
|
||||||
|
Model: model,
|
||||||
|
Tools: "Read,Write",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("writer agent: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := json.Marshal(writerResult)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal result: %w", err)
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
82
internal/skills/trainer/handlers_test.go
Normal file
82
internal/skills/trainer/handlers_test.go
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
// internal/skills/trainer/handlers_test.go
|
||||||
|
package trainer_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTrainerToolRegistered(t *testing.T) {
|
||||||
|
sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"})
|
||||||
|
names := make([]string, 0)
|
||||||
|
for _, tool := range sk.Tools() {
|
||||||
|
names = append(names, tool.Name)
|
||||||
|
}
|
||||||
|
assert.Contains(t, names, "trainer")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTrainerRequiresSessionID(t *testing.T) {
|
||||||
|
sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"})
|
||||||
|
_, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{}`))
|
||||||
|
assert.ErrorContains(t, err, "session_id")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTrainerCallsReaderThenWriter(t *testing.T) {
|
||||||
|
sessDir := t.TempDir()
|
||||||
|
require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{
|
||||||
|
SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass",
|
||||||
|
Message: "wrote failing test", FilePath: "internal/foo/foo_test.go",
|
||||||
|
}))
|
||||||
|
|
||||||
|
callCount := 0
|
||||||
|
var readerTask, writerTask string
|
||||||
|
|
||||||
|
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||||
|
callCount++
|
||||||
|
if callCount == 1 {
|
||||||
|
// reader call
|
||||||
|
readerTask = req.TaskPrompt
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass", Phase: "trainer", Skill: "trainer",
|
||||||
|
RunnerOutput: `[{"type":"sft","moment":"first-pass clean TDD","score":4}]`,
|
||||||
|
Verified: true, ModelUsed: "self", Message: "1 sft candidate found",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
// writer call
|
||||||
|
writerTask = req.TaskPrompt
|
||||||
|
return iexec.Result{
|
||||||
|
Status: "pass", Phase: "trainer", Skill: "trainer",
|
||||||
|
FilePath: sessDir + "/training-data/sft/sess-1.jsonl",
|
||||||
|
Verified: true, ModelUsed: "self", Message: "1 sft pair written",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := trainer.New(trainer.Config{
|
||||||
|
ReaderPrompt: "reader rules",
|
||||||
|
WriterPrompt: "writer rules",
|
||||||
|
ExecutorFn: fakeFn,
|
||||||
|
SessionsDir: sessDir,
|
||||||
|
BrainDir: t.TempDir(),
|
||||||
|
})
|
||||||
|
out, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{"session_id":"sess-1"}`))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 2, callCount, "executor must be called exactly twice: reader then writer")
|
||||||
|
assert.Contains(t, readerTask, "role: reader")
|
||||||
|
assert.Contains(t, readerTask, "sess-1")
|
||||||
|
assert.Contains(t, readerTask, "wrote failing test") // session history in reader prompt
|
||||||
|
assert.Contains(t, writerTask, "role: writer")
|
||||||
|
assert.Contains(t, writerTask, "sft candidate") // reader output passed to writer
|
||||||
|
|
||||||
|
var result iexec.Result
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
assert.Equal(t, "trainer", result.Phase)
|
||||||
|
assert.Equal(t, "pass", result.Status)
|
||||||
|
}
|
||||||
53
internal/skills/trainer/skill.go
Normal file
53
internal/skills/trainer/skill.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// internal/skills/trainer/skill.go
|
||||||
|
package trainer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExecutorFn is the function signature for running a worker subprocess.
|
||||||
|
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||||
|
|
||||||
|
// Config holds dependencies for the trainer skill.
|
||||||
|
type Config struct {
|
||||||
|
ReaderPrompt string
|
||||||
|
WriterPrompt string
|
||||||
|
DefaultModel string
|
||||||
|
ExecutorFn ExecutorFn
|
||||||
|
SessionsDir string
|
||||||
|
BrainDir string // root of brain/ directory; writer writes to BrainDir/training-data/
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skill implements the trainer MCP tool.
|
||||||
|
type Skill struct{ cfg Config }
|
||||||
|
|
||||||
|
// New creates a new trainer Skill.
|
||||||
|
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
||||||
|
|
||||||
|
// Name returns the skill identifier.
|
||||||
|
func (s *Skill) Name() string { return "trainer" }
|
||||||
|
|
||||||
|
// Tools returns the MCP tool definitions for this skill.
|
||||||
|
func (s *Skill) Tools() []registry.ToolDef {
|
||||||
|
schema := func(required []string, props map[string]any) json.RawMessage {
|
||||||
|
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
return []registry.ToolDef{
|
||||||
|
{
|
||||||
|
Name: "trainer",
|
||||||
|
Description: "Extract SFT and DPO training pairs from a session log. Runs a reader→writer chain: reader identifies learning moments, writer formats and writes pairs to brain/training-data/.",
|
||||||
|
InputSchema: schema(
|
||||||
|
[]string{"session_id"},
|
||||||
|
map[string]any{
|
||||||
|
"session_id": map[string]any{"type": "string"},
|
||||||
|
"model": map[string]any{"type": "string"},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user