Compare commits
83 Commits
a0cfc866df
...
v0.4.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e9a648115 | ||
|
|
923a665365 | ||
|
|
537aebc302 | ||
|
|
de35d4dbb0 | ||
|
|
26855f69b0 | ||
|
|
a7b363d589 | ||
|
|
7b57051af8 | ||
|
|
a620f6cb01 | ||
|
|
26b5636b43 | ||
|
|
989f375aec | ||
|
|
6403d5e444 | ||
|
|
ab19968ae2 | ||
|
|
1605624668 | ||
|
|
55fa0b503a | ||
|
|
3c2bd9268c | ||
|
|
29727ec2a5 | ||
|
|
0a075088b2 | ||
|
|
1bfe501d09 | ||
|
|
3607920601 | ||
|
|
a6c39e8691 | ||
|
|
a37d18bf7a | ||
|
|
2975eadc87 | ||
|
|
53e46781b1 | ||
|
|
e9b5cc401c | ||
|
|
bf6f497d9d | ||
|
|
9cc6c2d053 | ||
|
|
43a46d07e5 | ||
|
|
820d1c93a7 | ||
|
|
6928907d79 | ||
|
|
e74320a8e8 | ||
|
|
1b0706f270 | ||
|
|
2ae6bfe81e | ||
|
|
a6dce972d6 | ||
|
|
2f4b577131 | ||
|
|
a25bb18c54 | ||
|
|
78531bb238 | ||
|
|
04fefe8e9c | ||
|
|
103f4d90bf | ||
|
|
9b11719481 | ||
|
|
d405346f07 | ||
|
|
bf8a3fc11c | ||
|
|
ae5a4d04f0 | ||
|
|
3a0424a6b4 | ||
|
|
08dd7b9365 | ||
|
|
91e02b930c | ||
|
|
c7341a2607 | ||
|
|
b5a0085c0a | ||
|
|
d6daa37c71 | ||
|
|
62fc3989f2 | ||
|
|
c9310b1079 | ||
|
|
ca8a691241 | ||
|
|
214f607007 | ||
|
|
0e08dfffb8 | ||
|
|
caef05bea4 | ||
|
|
ca1a16873c | ||
|
|
63c238c650 | ||
|
|
ce45592730 | ||
|
|
823de23213 | ||
|
|
78d3939caa | ||
|
|
f2bc39b500 | ||
|
|
3625e1268d | ||
|
|
47df642836 | ||
|
|
235d70ad0b | ||
|
|
7d5289ac54 | ||
|
|
3d8fc9dacd | ||
|
|
f9f804cd49 | ||
|
|
85f142ade0 | ||
|
|
0dfad02513 | ||
|
|
c44eb680b2 | ||
|
|
38ada998a2 | ||
|
|
74547c2bdf | ||
|
|
587c0d3b1c | ||
|
|
bb61f2992b | ||
|
|
3ba72d9b28 | ||
|
|
b4f0fbc3ea | ||
|
|
12943ee6f4 | ||
|
|
9af95ebd96 | ||
|
|
f0b567f3e6 | ||
|
|
e3d6cf4cf5 | ||
|
|
df59bd010c | ||
|
|
e5152151d6 | ||
|
|
aa2d57e619 | ||
|
|
6b53706987 |
@@ -1,31 +1,33 @@
|
|||||||
name: cd
|
name: cd
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
workflow_run:
|
||||||
|
workflows: ["CI"]
|
||||||
|
types: [completed]
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
deploy:
|
deploy:
|
||||||
name: Build and deploy
|
name: Build and deploy
|
||||||
needs: [check]
|
|
||||||
runs-on: self-hosted
|
runs-on: self-hosted
|
||||||
|
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' }}
|
||||||
env:
|
env:
|
||||||
SERVICE: supervisor
|
SERVICE: supervisor
|
||||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||||
|
INGESTION_IMAGE: gitea.d-ma.be/mathias/ingestion
|
||||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build and push image
|
- name: Build and push supervisor image
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
||||||
IMAGE_TAG="${{ github.sha }}"
|
IMAGE_TAG="${{ github.sha }}"
|
||||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||||
|
|
||||||
# Build to local OCI tar (no registry auth needed at build time)
|
|
||||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||||
--frontend dockerfile.v0 \
|
--frontend dockerfile.v0 \
|
||||||
--local context=. \
|
--local context=. \
|
||||||
@@ -33,7 +35,6 @@ jobs:
|
|||||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||||
--output type=oci,dest=/tmp/supervisor-image.tar
|
--output type=oci,dest=/tmp/supervisor-image.tar
|
||||||
|
|
||||||
# Push with skopeo using simple credential flag (avoids OAuth token flow)
|
|
||||||
skopeo copy \
|
skopeo copy \
|
||||||
oci-archive:/tmp/supervisor-image.tar \
|
oci-archive:/tmp/supervisor-image.tar \
|
||||||
docker://${IMAGE}:${IMAGE_TAG} \
|
docker://${IMAGE}:${IMAGE_TAG} \
|
||||||
@@ -41,6 +42,26 @@ jobs:
|
|||||||
|
|
||||||
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
||||||
|
|
||||||
|
- name: Build and push ingestion image
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
trap 'rm -f /tmp/ingestion-image.tar' EXIT
|
||||||
|
IMAGE_TAG="${{ github.sha }}"
|
||||||
|
echo "Building ${INGESTION_IMAGE}:${IMAGE_TAG}"
|
||||||
|
|
||||||
|
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||||
|
--frontend dockerfile.v0 \
|
||||||
|
--local context=ingestion \
|
||||||
|
--local dockerfile=ingestion \
|
||||||
|
--output type=oci,dest=/tmp/ingestion-image.tar
|
||||||
|
|
||||||
|
skopeo copy \
|
||||||
|
oci-archive:/tmp/ingestion-image.tar \
|
||||||
|
docker://${INGESTION_IMAGE}:${IMAGE_TAG} \
|
||||||
|
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
||||||
|
|
||||||
|
echo "Built and pushed ${INGESTION_IMAGE}:${IMAGE_TAG}"
|
||||||
|
|
||||||
- name: Update infra repo
|
- name: Update infra repo
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
@@ -49,20 +70,24 @@ jobs:
|
|||||||
mkdir -p ~/.ssh
|
mkdir -p ~/.ssh
|
||||||
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
||||||
chmod 600 ~/.ssh/infra_deploy_key
|
chmod 600 ~/.ssh/infra_deploy_key
|
||||||
ssh-keyscan gitea.d-ma.be >> ~/.ssh/known_hosts 2>/dev/null
|
printf 'Host gitea.d-ma.be\n HostName 127.0.0.1\n Port 30022\n StrictHostKeyChecking no\n' >> ~/.ssh/config
|
||||||
|
|
||||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||||
git clone "${INFRA_REPO}" /tmp/infra-update
|
git clone "${INFRA_REPO}" /tmp/infra-update
|
||||||
|
|
||||||
cd /tmp/infra-update
|
cd /tmp/infra-update
|
||||||
|
|
||||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||||
"k3s/apps/${SERVICE}/deployment.yaml"
|
"k3s/apps/${SERVICE}/deployment.yaml"
|
||||||
|
|
||||||
|
sed -i "s|gitea.d-ma.be/mathias/ingestion:.*|gitea.d-ma.be/mathias/ingestion:${IMAGE_TAG}|" \
|
||||||
|
"k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
||||||
|
|
||||||
git config user.email "cd-bot@d-ma.be"
|
git config user.email "cd-bot@d-ma.be"
|
||||||
git config user.name "CD Bot"
|
git config user.name "CD Bot"
|
||||||
git add "k3s/apps/${SERVICE}/deployment.yaml"
|
git add "k3s/apps/${SERVICE}/deployment.yaml" "k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
||||||
git commit -m "chore(deploy): ${SERVICE} → ${IMAGE_TAG}"
|
git commit -m "chore(deploy): ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
||||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||||
git push
|
git push
|
||||||
|
|
||||||
echo "Infra repo updated: ${SERVICE} → ${IMAGE_TAG}"
|
echo "Infra repo updated: ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -34,6 +34,7 @@ secrets/
|
|||||||
# ── Documented examples (commit these) ──
|
# ── Documented examples (commit these) ──
|
||||||
!.env.example
|
!.env.example
|
||||||
!config/supervisor/CLAUDE.md
|
!config/supervisor/CLAUDE.md
|
||||||
|
!brain/CLAUDE.md
|
||||||
|
|
||||||
# IDE
|
# IDE
|
||||||
.idea/
|
.idea/
|
||||||
|
|||||||
10
.mcp.json
Normal file
10
.mcp.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"supervisor": {
|
||||||
|
"command": "/Users/mathias/dev/AI/supervisor/bin/supervisor-bridge",
|
||||||
|
"env": {
|
||||||
|
"SUPERVISOR_URL": "http://koala:30320/mcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
23
DECISIONS.md
23
DECISIONS.md
@@ -44,6 +44,29 @@ Record *why* things are the way they are. Future-you will thank present-you.
|
|||||||
|
|
||||||
**Consequences**: More operational complexity than Chroma, but isolation is non-negotiable for client work.
|
**Consequences**: More operational complexity than Chroma, but isolation is non-negotiable for client work.
|
||||||
|
|
||||||
|
## 2026-04-22 — Hyperguild scope reset: drop parametric learning, simplify brain
|
||||||
|
|
||||||
|
**Context**: After shipping Phases 1–4 (MCP server, 6 skills, model orchestration, session logging, CD pipeline), we critically reviewed what was theater vs genuinely useful.
|
||||||
|
|
||||||
|
**Decisions**:
|
||||||
|
|
||||||
|
1. **Drop the parametric learning pipeline.** SFT/DPO/RL extraction, `brain/training-data/` directory structure, Axolotl/LLaMA-Factory fine-tuning loop — all cut. The loop requires thousands of high-quality examples to move the needle, which a solo consultant won't generate. Better base models ship faster than any fine-tuning effort could keep up with. This is a research project, not a productivity tool.
|
||||||
|
|
||||||
|
2. **Simplify the brain to plain markdown.** `brain/knowledge/` replaces `brain/wiki/ + brain/raw/ + brain/training-data/`. The trainer and retrospective workers write markdown entries. `brain_query` searches markdown. No ingestion pipeline, no tagging for significance review, no structured JSONL formats.
|
||||||
|
|
||||||
|
3. **Measure the escalation chain before assuming it's useful.** Local model (phi4) only belongs in a skill's chain if it passes Claude verification at a meaningful rate. Where it fails >70% of the time, it adds cost not value. Per-skill hit rate logging is the prerequisite to honest chain configuration.
|
||||||
|
|
||||||
|
4. **Keep what's real**: MCP tool surface, session logging with attempt records, tier detection, CD pipeline, bridge to Claude Code.
|
||||||
|
|
||||||
|
**What to build next** (in priority order):
|
||||||
|
- `brain_query` injection into skill handlers before spawning workers — this makes the declarative brain actually function
|
||||||
|
- `protocols.md` — behavioral contract injected into every worker prompt
|
||||||
|
- Per-skill pass rate logging and chain tuning
|
||||||
|
|
||||||
|
**Consequences**: Simpler system with a shorter feedback loop. The brain becomes real only when skill handlers query it. Training data ambitions deferred indefinitely — revisit if local model capabilities improve enough that fine-tuning becomes worthwhile.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 2026-04-08 — Mistral Vibe gets its own adapter
|
## 2026-04-08 — Mistral Vibe gets its own adapter
|
||||||
|
|
||||||
**Context**: Vibe doesn't read `AGENTS.md` — it uses `~/.vibe/prompts/` and `~/.vibe/agents/` with TOML config.
|
**Context**: Vibe doesn't read `AGENTS.md` — it uses `~/.vibe/prompts/` and `~/.vibe/agents/` with TOML config.
|
||||||
|
|||||||
4
Procfile
4
Procfile
@@ -1,2 +1,2 @@
|
|||||||
ingestion: cd ingestion && INGEST_BRAIN_DIR=../brain INGEST_PORT=3300 go run ./cmd/server/
|
ingestion: cd ingestion && INGEST_BRAIN_DIR=../brain INGEST_PORT=3300 INGEST_WATCH_INTERVAL=30 go run ./cmd/server/
|
||||||
supervisor: SUPERVISOR_CONFIG_DIR=./config/supervisor SUPERVISOR_MODELS_FILE=./config/models.yaml SUPERVISOR_SESSIONS_DIR=./brain/sessions INGEST_BASE_URL=http://localhost:3300 go run ./cmd/supervisor/
|
supervisor: SUPERVISOR_CONFIG_DIR=./config/supervisor SUPERVISOR_MODELS_FILE=./config/models.yaml SUPERVISOR_SESSIONS_DIR=./brain/sessions INGEST_BASE_URL=http://localhost:3300 INGEST_SVC_URL=http://localhost:3300 go run ./cmd/supervisor/
|
||||||
|
|||||||
137
brain/schema.md
Normal file
137
brain/schema.md
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# Brain Wiki Schema
|
||||||
|
|
||||||
|
This document defines the three page types in the brain wiki.
|
||||||
|
The LLM must follow this schema exactly when generating wiki pages.
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Return a JSON array. Each element:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "exact page title",
|
||||||
|
"type": "source | concept | entity",
|
||||||
|
"subtype": "see below — omit for concept",
|
||||||
|
"domain": "see domains — omit if none fits",
|
||||||
|
"content": "Markdown body only — no frontmatter, no path"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- `subtype` for **source**: `article | pdf | book | video | note | project`
|
||||||
|
- `subtype` for **entity**: `person | company | tool | model | framework | technology`
|
||||||
|
- The pipeline computes slugs and frontmatter — never include them in output.
|
||||||
|
|
||||||
|
## Wikilink Format
|
||||||
|
|
||||||
|
All cross-references use `[[Display Name]]` — just the display name, no slug, no pipe.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Only link to pages in the inventory or pages you are creating in this response
|
||||||
|
- The pipeline converts `[[Display Name]]` to `[[slug|Display Name]]` automatically
|
||||||
|
- Section links must match their section type (Related Concepts → concept pages only, etc.)
|
||||||
|
|
||||||
|
Examples: `[[Domain Driven Design]]`, `[[Ryan Singer]]`, `[[Shape Up]]`
|
||||||
|
|
||||||
|
## Domains
|
||||||
|
|
||||||
|
Use one of: `ai-llm`, `software-engineering`, `product-strategy`, `finance-markets`,
|
||||||
|
`personal`, `consulting`, `climate`, `infrastructure`, `security`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Source Pages — wiki/sources/<slug>.md
|
||||||
|
|
||||||
|
One page per ingested source. Books are NEVER split across multiple source pages — update the existing one.
|
||||||
|
|
||||||
|
Body sections (in this order):
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
2–3 sentences. Core argument or finding.
|
||||||
|
|
||||||
|
### Key Claims
|
||||||
|
Bulleted list. Paraphrase — no verbatim quotes or code.
|
||||||
|
|
||||||
|
### Concepts Introduced or Reinforced
|
||||||
|
Wikilinks to concept pages ONLY. One per line.
|
||||||
|
|
||||||
|
### Entities Mentioned
|
||||||
|
Wikilinks to entity pages ONLY. One per line.
|
||||||
|
|
||||||
|
### Open Questions Raised
|
||||||
|
Gaps or follow-up questions from this source.
|
||||||
|
|
||||||
|
For books only, also add:
|
||||||
|
|
||||||
|
### Chapters
|
||||||
|
One bullet per chapter with 1–2 sentence summary.
|
||||||
|
|
||||||
|
### Argument Arc
|
||||||
|
Overall narrative as it becomes clear across chapters.
|
||||||
|
|
||||||
|
### Updates
|
||||||
|
Dated entries appended on re-ingestion. NEVER rewrite — only append.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Concept Pages — wiki/concepts/<slug>.md
|
||||||
|
|
||||||
|
One page per idea, framework, methodology, or pattern.
|
||||||
|
|
||||||
|
Body sections (in this order):
|
||||||
|
|
||||||
|
### Definition
|
||||||
|
One-paragraph plain-language explanation.
|
||||||
|
|
||||||
|
### Why It Matters
|
||||||
|
Practical significance. Why should anyone care?
|
||||||
|
|
||||||
|
### Related Concepts
|
||||||
|
Wikilinks to concept pages ONLY.
|
||||||
|
|
||||||
|
### Related Entities
|
||||||
|
Wikilinks to entity pages ONLY.
|
||||||
|
|
||||||
|
### Sources
|
||||||
|
Wikilinks to source pages ONLY.
|
||||||
|
|
||||||
|
### Evolving Notes
|
||||||
|
Updated as new sources arrive. Append, do not rewrite.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Entity Pages — wiki/entities/<slug>.md
|
||||||
|
|
||||||
|
One page per person, tool, organisation, technology, or product.
|
||||||
|
|
||||||
|
Body sections (in this order):
|
||||||
|
|
||||||
|
### Description
|
||||||
|
One-line description.
|
||||||
|
|
||||||
|
### Relevance
|
||||||
|
Why this entity matters to this knowledge base.
|
||||||
|
|
||||||
|
### Key Positions, Products, or Claims
|
||||||
|
With dates where known.
|
||||||
|
|
||||||
|
### Related Concepts
|
||||||
|
Wikilinks to concept pages ONLY.
|
||||||
|
|
||||||
|
### Related Entities
|
||||||
|
Wikilinks to entity pages ONLY.
|
||||||
|
|
||||||
|
### Sources
|
||||||
|
Wikilinks to source pages ONLY.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Non-Negotiable Rules
|
||||||
|
|
||||||
|
1. Output ONLY a valid JSON array — no markdown fences, no prose before or after
|
||||||
|
2. Each element: `{"title": "...", "type": "...", "subtype": "...", "domain": "...", "content": "..."}`
|
||||||
|
3. Never include slugs, paths, or frontmatter in output — the pipeline handles these
|
||||||
|
4. Wikilinks: `[[Display Name]]` only — no pipe, no slug
|
||||||
|
5. Dates always YYYY-MM-DD (used only in content body where contextually relevant)
|
||||||
|
6. Never reproduce verbatim code — describe the pattern or technique
|
||||||
|
7. Section links must match their section type
|
||||||
|
8. One source page per book — if inventory shows it exists, include it as an UPDATE
|
||||||
@@ -37,12 +37,17 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
systemPrompt, err := os.ReadFile(cfg.ConfigDir + "/CLAUDE.md")
|
protocolsPrompt, err := os.ReadFile(cfg.ConfigDir + "/protocols.md")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("read supervisor CLAUDE.md", "path", cfg.ConfigDir+"/CLAUDE.md", "err", err)
|
logger.Error("read protocols.md", "path", cfg.ConfigDir+"/protocols.md", "err", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prependProtocols prepends the shared protocols to a skill discipline file.
|
||||||
|
prependProtocols := func(skillPrompt []byte) string {
|
||||||
|
return string(protocolsPrompt) + "\n---\n\n" + string(skillPrompt)
|
||||||
|
}
|
||||||
|
|
||||||
tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md")
|
tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("read tdd.md", "path", cfg.ConfigDir+"/tdd.md", "err", err)
|
logger.Error("read tdd.md", "path", cfg.ConfigDir+"/tdd.md", "err", err)
|
||||||
@@ -84,26 +89,7 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
claudeExec := iexec.New(iexec.Config{
|
litellm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||||
SystemPrompt: string(systemPrompt),
|
|
||||||
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
|
||||||
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
|
||||||
})
|
|
||||||
litellmExec := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
|
||||||
verifier := iexec.NewVerifier("", models.Verifier(), 0)
|
|
||||||
|
|
||||||
buildOrch := func(skill string) func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
|
||||||
return func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
|
||||||
rawChain := models.ChainFor(skill, req.Model)
|
|
||||||
chain := make([]iexec.ChainEntry, len(rawChain))
|
|
||||||
for i, m := range rawChain {
|
|
||||||
chain[i] = iexec.EntryFor(m)
|
|
||||||
}
|
|
||||||
attempts := make([]iexec.AttemptRecord, 0, len(chain))
|
|
||||||
orch := iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts)
|
|
||||||
return orch.Run(ctx, req)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tierFn := func(ctx context.Context) tier.Info {
|
tierFn := func(ctx context.Context) tier.Info {
|
||||||
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
||||||
@@ -111,14 +97,16 @@ func main() {
|
|||||||
|
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
reg.Register(tdd.New(tdd.Config{
|
reg.Register(tdd.New(tdd.Config{
|
||||||
SystemPrompt: string(systemPrompt),
|
SkillPrompt: prependProtocols(tddPrompt),
|
||||||
SkillPrompt: string(tddPrompt),
|
DefaultModel: models.ModelFor("tdd", ""),
|
||||||
DefaultModel: models.ChainFor("tdd", "")[0],
|
CompleteFunc: litellm.Complete,
|
||||||
ExecutorFn: buildOrch("tdd"),
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
}))
|
}))
|
||||||
reg.Register(brain.New(brain.Config{
|
reg.Register(brain.New(brain.Config{
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
|
IngestSvcURL: cfg.IngestSvcURL,
|
||||||
|
KBRetrievalURL: cfg.KBRetrievalURL,
|
||||||
}))
|
}))
|
||||||
reg.Register(org.New(org.Config{
|
reg.Register(org.New(org.Config{
|
||||||
TierFn: tierFn,
|
TierFn: tierFn,
|
||||||
@@ -127,34 +115,37 @@ func main() {
|
|||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
}))
|
}))
|
||||||
reg.Register(retrospective.New(retrospective.Config{
|
reg.Register(retrospective.New(retrospective.Config{
|
||||||
SkillPrompt: string(retroPrompt),
|
SkillPrompt: prependProtocols(retroPrompt),
|
||||||
DefaultModel: models.ChainFor("retrospective", "")[0],
|
DefaultModel: models.ModelFor("retrospective", ""),
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
ExecutorFn: buildOrch("retrospective"),
|
CompleteFunc: litellm.Complete,
|
||||||
}))
|
}))
|
||||||
reg.Register(review.New(review.Config{
|
reg.Register(review.New(review.Config{
|
||||||
SkillPrompt: string(reviewPrompt),
|
SkillPrompt: prependProtocols(reviewPrompt),
|
||||||
DefaultModel: models.ChainFor("review", "")[0],
|
DefaultModel: models.ModelFor("review", ""),
|
||||||
ExecutorFn: buildOrch("review"),
|
CompleteFunc: litellm.Complete,
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
}))
|
}))
|
||||||
reg.Register(skilldebug.New(skilldebug.Config{
|
reg.Register(skilldebug.New(skilldebug.Config{
|
||||||
SkillPrompt: string(debugPrompt),
|
SkillPrompt: prependProtocols(debugPrompt),
|
||||||
DefaultModel: models.ChainFor("debug", "")[0],
|
DefaultModel: models.ModelFor("debug", ""),
|
||||||
ExecutorFn: buildOrch("debug"),
|
CompleteFunc: litellm.Complete,
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
}))
|
}))
|
||||||
reg.Register(spec.New(spec.Config{
|
reg.Register(spec.New(spec.Config{
|
||||||
SkillPrompt: string(specPrompt),
|
SkillPrompt: prependProtocols(specPrompt),
|
||||||
DefaultModel: models.ChainFor("spec", "")[0],
|
DefaultModel: models.ModelFor("spec", ""),
|
||||||
ExecutorFn: buildOrch("spec"),
|
CompleteFunc: litellm.Complete,
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
|
IngestBaseURL: cfg.IngestBaseURL,
|
||||||
}))
|
}))
|
||||||
reg.Register(trainer.New(trainer.Config{
|
reg.Register(trainer.New(trainer.Config{
|
||||||
ReaderPrompt: string(trainerReaderPrompt),
|
ReaderPrompt: prependProtocols(trainerReaderPrompt),
|
||||||
WriterPrompt: string(trainerWriterPrompt),
|
WriterPrompt: prependProtocols(trainerWriterPrompt),
|
||||||
DefaultModel: models.ChainFor("trainer", "")[0],
|
DefaultModel: models.ModelFor("trainer", ""),
|
||||||
ExecutorFn: buildOrch("trainer"),
|
CompleteFunc: litellm.Complete,
|
||||||
SessionsDir: cfg.SessionsDir,
|
SessionsDir: cfg.SessionsDir,
|
||||||
BrainDir: cfg.BrainDir,
|
BrainDir: cfg.BrainDir,
|
||||||
}))
|
}))
|
||||||
@@ -164,7 +155,7 @@ func main() {
|
|||||||
mux.Handle("/mcp", srv)
|
mux.Handle("/mcp", srv)
|
||||||
|
|
||||||
addr := ":" + cfg.Port
|
addr := ":" + cfg.Port
|
||||||
logger.Info("supervisor starting", "addr", addr)
|
logger.Info("supervisor starting", "addr", addr, "version", "v0.5.0")
|
||||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||||
logger.Error("server stopped", "err", err)
|
logger.Error("server stopped", "err", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|||||||
@@ -1,41 +1,26 @@
|
|||||||
# Model routing chains — three-layer priority:
|
# Model selection — first entry per skill is used.
|
||||||
# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation)
|
# Override per-call by passing model in the MCP tool args.
|
||||||
# 2. per-skill chain here
|
# Model names come from LiteLLM /v1/models (host/name format).
|
||||||
# 3. default_chain fallback
|
|
||||||
|
|
||||||
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
|
||||||
|
|
||||||
llama_swap_url: http://koala:8080 # for warm-state probing
|
|
||||||
|
|
||||||
default_chain:
|
default_chain:
|
||||||
- ollama/qwen3-coder-30b-tuned
|
- iguana/qwen3-coder-next
|
||||||
- claude-sonnet-4-6
|
|
||||||
|
|
||||||
skills:
|
skills:
|
||||||
tdd:
|
tdd:
|
||||||
chain:
|
chain:
|
||||||
- ollama/qwen3-coder-30b-tuned
|
- koala/qwen3-coder-30b
|
||||||
- claude-sonnet-4-6
|
|
||||||
review:
|
review:
|
||||||
chain:
|
chain:
|
||||||
- ollama/devstral-tuned
|
- iguana/devstral
|
||||||
- ollama/gemma4
|
|
||||||
- claude-sonnet-4-6
|
|
||||||
debug:
|
debug:
|
||||||
chain:
|
chain:
|
||||||
- ollama/deepseek-r1-tuned
|
- iguana/deepseek-r1-14b
|
||||||
- claude-sonnet-4-6
|
|
||||||
spec:
|
spec:
|
||||||
chain:
|
chain:
|
||||||
- ollama/phi4
|
- koala/phi4-14b
|
||||||
- ollama/gemma4
|
|
||||||
- claude-sonnet-4-6
|
|
||||||
- claude-opus-4-6
|
|
||||||
retrospective:
|
retrospective:
|
||||||
chain:
|
chain:
|
||||||
- ollama/qwen3-coder-30b-tuned
|
- iguana/qwen3-coder-next
|
||||||
- claude-sonnet-4-6
|
|
||||||
trainer:
|
trainer:
|
||||||
chain:
|
chain:
|
||||||
- ollama/qwen3-coder-30b-tuned
|
- iguana/qwen3-coder-next
|
||||||
- claude-sonnet-4-6
|
|
||||||
|
|||||||
@@ -1,27 +1,31 @@
|
|||||||
# The Hyperguild Way
|
# Hyperguild Skill Protocols
|
||||||
|
|
||||||
These protocols are injected into every worker invocation. They define how you behave as a member of the hyperguild.
|
**IMPORTANT: DO NOT OUTPUT JSON. DO NOT USE JSON CODE BLOCKS.**
|
||||||
|
Your response must be plain markdown text. No `{"status":...}`, no ` ```json `, nothing.
|
||||||
|
If you output JSON you will be ignored. Respond in prose and markdown only.
|
||||||
|
|
||||||
## Output contract
|
---
|
||||||
|
|
||||||
Every response is raw JSON matching the response schema. No preamble, no prose, no markdown. Malformed output is treated as a failed invocation.
|
## Role
|
||||||
|
|
||||||
## Quality gate
|
You are a consultant. You analyse, suggest, and explain.
|
||||||
|
Claude Code has the tools to read files, run commands, and write code.
|
||||||
|
You provide the thinking; Claude Code provides the action.
|
||||||
|
|
||||||
`verified: true` only when a subprocess exit code confirms the outcome. Never self-assess. "I think the tests pass" is not verified.
|
## Output
|
||||||
|
|
||||||
## Escalation
|
Write in clear markdown. Lead with the key finding. Use headers and bullet lists
|
||||||
|
where they help. Be concise — Claude Code reads your full response.
|
||||||
|
|
||||||
If stuck after 3 attempts, return `status: error` with a clear `message` explaining why. Do not retry silently. Do not fabricate a passing result.
|
Do not make up file contents, test results, or command output you have not seen.
|
||||||
|
If you lack context to give a useful answer, say so and state what you need.
|
||||||
|
|
||||||
## Working offline
|
## Context blocks
|
||||||
|
|
||||||
If brain context is absent from your prompt, proceed using your discipline file only. Note the gap in your `message` field: "no brain context available".
|
You may receive one or both of these blocks before your task:
|
||||||
|
|
||||||
## Handoff format
|
**`## Relevant knowledge`** — patterns and decisions from past sessions. Let them
|
||||||
|
inform your approach. Do not contradict them without reason.
|
||||||
|
|
||||||
Structure your output so the next worker in a chain can consume it without transformation. Use the standard result schema. Do not add extra fields.
|
**`## Session history`** — what has already happened in this session. Build on it,
|
||||||
|
do not repeat it.
|
||||||
## Session logging
|
|
||||||
|
|
||||||
The Go skill handler records your invocation in the session log automatically. You do not need to do this yourself.
|
|
||||||
|
|||||||
@@ -1,40 +1,33 @@
|
|||||||
# Retrospective Worker Discipline
|
# Retrospective Discipline
|
||||||
|
|
||||||
You are the retrospective worker. Your job is to review a completed coding session and identify knowledge worth preserving in the hyperguild brain.
|
You review a completed coding session and identify knowledge worth preserving.
|
||||||
|
|
||||||
## What you receive
|
## What you receive
|
||||||
|
|
||||||
- A session log in JSON format listing every skill invocation: what was attempted, what failed, what passed, how long it took.
|
A session log in JSON format listing every skill invocation: what was attempted,
|
||||||
|
what failed, what passed, how long it took.
|
||||||
## What you produce
|
|
||||||
|
|
||||||
For each significant learning, call brain_write with a structured markdown note. Then return a JSON result summarising what you wrote.
|
|
||||||
|
|
||||||
## What is worth preserving
|
## What is worth preserving
|
||||||
|
|
||||||
- Patterns that worked and should be repeated
|
- Patterns that worked and should be repeated
|
||||||
- Failures that revealed something non-obvious about the codebase or the discipline
|
- Failures that revealed something non-obvious about the codebase or the approach
|
||||||
- Decisions made during the session (architectural, structural, tooling)
|
- Decisions made during the session (architectural, structural, tooling)
|
||||||
- Anything that contradicts or extends what the brain already knows
|
- Anything that contradicts or extends established patterns
|
||||||
|
|
||||||
## What is NOT worth preserving
|
## What is NOT worth preserving
|
||||||
|
|
||||||
- Routine TDD cycles with no surprises
|
- Routine cycles with no surprises
|
||||||
- Single-attempt passes with no interesting context
|
- Single-attempt passes with no interesting context
|
||||||
- Mechanical operations (file moves, renames, formatting)
|
- Mechanical operations (file moves, renames, formatting)
|
||||||
|
|
||||||
## Output format
|
## Output format
|
||||||
|
|
||||||
Return JSON matching the standard result schema:
|
Respond in markdown. For each learning worth preserving:
|
||||||
|
|
||||||
```json
|
**Learning:** One sentence describing what was learned.
|
||||||
{
|
**Context:** Why this session surfaced it — what made it non-obvious.
|
||||||
"status": "pass",
|
**Recommendation:** What should be done differently or repeated going forward.
|
||||||
"phase": "retrospective",
|
|
||||||
"skill": "retrospective",
|
|
||||||
"verified": true,
|
|
||||||
"message": "wrote N entries to brain/raw/"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
`verified` is true when you successfully called brain_write at least once and received a confirmation. If the session had nothing worth writing, return `verified: true` with `message: "no novel learnings in this session"`.
|
End with a summary: "N learnings worth writing to brain" or "No novel learnings in this session."
|
||||||
|
|
||||||
|
The caller will decide which learnings to write to the brain using brain_write.
|
||||||
|
|||||||
@@ -2,29 +2,24 @@
|
|||||||
|
|
||||||
You are a disciplined code reviewer. Read files carefully before commenting.
|
You are a disciplined code reviewer. Read files carefully before commenting.
|
||||||
|
|
||||||
## Iron laws
|
## Iron laws — any violation is a blocking issue
|
||||||
1. Never approve security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries
|
1. No security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries
|
||||||
2. Never approve silently swallowed errors — `err != nil` without wrapping or handling is always wrong
|
2. No silently swallowed errors — `err != nil` without wrapping or handling is always wrong
|
||||||
3. Never approve missing validation at system boundaries (user input, external APIs, file reads)
|
3. No missing validation at system boundaries (user input, external APIs, file reads)
|
||||||
|
|
||||||
## Output contract
|
## Output format
|
||||||
Return JSON result with:
|
|
||||||
- `status`: "pass" if no blocking issues; "fail" if any iron law is violated
|
Respond in markdown. Group findings by severity:
|
||||||
- `phase`: "review"
|
|
||||||
- `skill`: "review"
|
**CRITICAL:** Issues that violate an iron law or will cause data loss / security breach.
|
||||||
- `file_path`: first file reviewed
|
**WARNING:** Issues that will likely cause bugs or maintenance problems.
|
||||||
- `runner_output`: full review formatted as:
|
**SUGGESTION:** Style, clarity, or optional improvements.
|
||||||
```
|
|
||||||
CRITICAL: <issue> at <file>:<line>
|
For each finding include the file and line number. If nothing is wrong, explain specifically which iron law checks you ran and why they passed — never rubber-stamp.
|
||||||
WARNING: <issue> at <file>:<line>
|
|
||||||
SUGGESTION: <issue> at <file>:<line>
|
|
||||||
```
|
|
||||||
- `verified`: true if you read all specified files; false if any were missing or unreadable
|
|
||||||
- `message`: "N critical, M warnings, K suggestions" or "clean: <which iron law checks passed and why>"
|
|
||||||
|
|
||||||
## Rules
|
## Rules
|
||||||
1. Read every file listed before writing feedback
|
1. Read every file listed before writing feedback
|
||||||
2. Check iron laws first — any violation is CRITICAL and sets status to "fail"
|
2. Check iron laws first — if any are violated, flag them before anything else
|
||||||
3. Then check: correctness, test coverage for new code, Go style conventions
|
3. Then check: correctness, test coverage for new code, Go style conventions
|
||||||
4. Never rubber-stamp — if nothing is wrong, explain specifically which iron law checks you ran and why they passed
|
4. Line references required for every finding
|
||||||
5. Line references are required for every finding — "roughly around the middle" is not acceptable
|
5. End with a one-line summary: "N critical, M warnings, K suggestions" or "Clean — no issues found"
|
||||||
|
|||||||
@@ -7,40 +7,31 @@ You write structured implementation specs. Nothing is left ambiguous.
|
|||||||
2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong
|
2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong
|
||||||
3. Every technical decision in the approach must have a rationale
|
3. Every technical decision in the approach must have a rationale
|
||||||
|
|
||||||
## Output contract
|
## Output format
|
||||||
Return JSON result with:
|
|
||||||
- `status`: "pass" (spec written) or "error" (requirements too ambiguous to spec without more input)
|
|
||||||
- `phase`: "spec"
|
|
||||||
- `skill`: "spec"
|
|
||||||
- `file_path`: the output_path where the spec was written (absolute path)
|
|
||||||
- `runner_output`: ""
|
|
||||||
- `verified`: true if the file was written successfully
|
|
||||||
- `message`: "spec written: <one-line summary of what was specced>"
|
|
||||||
|
|
||||||
## Spec structure
|
Write the spec as markdown using this structure:
|
||||||
Write the spec as markdown to the output_path:
|
|
||||||
|
|
||||||
```markdown
|
```
|
||||||
# [Feature] Spec
|
# [Feature] Spec
|
||||||
|
|
||||||
## Problem statement
|
## Problem statement
|
||||||
[What problem does this solve? For whom? Why now?]
|
What problem does this solve? For whom? Why now?
|
||||||
|
|
||||||
## Success criteria
|
## Success criteria
|
||||||
- [ ] [Criterion 1 — measurable and verifiable]
|
- [ ] Criterion 1 — measurable and verifiable
|
||||||
- [ ] [Criterion 2 — measurable and verifiable]
|
- [ ] Criterion 2 — measurable and verifiable
|
||||||
|
|
||||||
## Constraints
|
## Constraints
|
||||||
[Non-negotiable requirements the solution must satisfy]
|
Non-negotiable requirements the solution must satisfy.
|
||||||
|
|
||||||
## Out of scope
|
## Out of scope
|
||||||
[What we are explicitly NOT doing in this iteration]
|
What we are explicitly NOT doing in this iteration.
|
||||||
|
|
||||||
## Technical approach
|
## Technical approach
|
||||||
[Architecture decisions, key components, rationale for each choice]
|
Architecture decisions, key components, rationale for each choice.
|
||||||
|
|
||||||
## Risks
|
## Risks
|
||||||
[What could go wrong, and how we'd mitigate it]
|
What could go wrong, and how we'd mitigate it.
|
||||||
```
|
```
|
||||||
|
|
||||||
If the requirements are too vague to produce measurable success criteria, return status "error" with a message listing the specific questions that need answers.
|
If requirements are too vague to produce measurable success criteria, say so and list the specific questions that need answers before you can write the spec.
|
||||||
|
|||||||
@@ -1,26 +1,35 @@
|
|||||||
# TDD Skill
|
# TDD Discipline
|
||||||
|
|
||||||
## Iron Law
|
## Iron Law
|
||||||
|
|
||||||
NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.
|
NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.
|
||||||
|
|
||||||
## Red phase
|
## Red phase — write a failing test
|
||||||
|
|
||||||
- Write exactly one test. One behavior. Name must describe the behavior clearly.
|
- Write exactly one test. One behavior. Name must describe the behavior clearly.
|
||||||
- Run the test suite. Confirm the test FAILS.
|
- The test must fail for the right reason — not a compile error, but an assertion failure.
|
||||||
- If the test passes immediately: it tests existing behavior or is vacuous.
|
|
||||||
Return status "fail" with message explaining why the test is wrong.
|
|
||||||
- Do not write any implementation code in this phase.
|
- Do not write any implementation code in this phase.
|
||||||
|
|
||||||
## Green phase
|
Respond with:
|
||||||
|
- The test code to write (file path + content)
|
||||||
|
- The exact failure you expect to see when running it
|
||||||
|
- Why that failure confirms the test is meaningful
|
||||||
|
|
||||||
|
## Green phase — make the test pass
|
||||||
|
|
||||||
- Write the minimal code to make the failing test pass. Nothing more.
|
- Write the minimal code to make the failing test pass. Nothing more.
|
||||||
- YAGNI: no extra parameters, no future-proofing, no clever abstractions.
|
- YAGNI: no extra parameters, no future-proofing, no clever abstractions.
|
||||||
- Run the test suite. Confirm it PASSES.
|
|
||||||
- If tests fail: fix the implementation, not the test. Max 3 attempts.
|
|
||||||
|
|
||||||
## Refactor phase
|
Respond with:
|
||||||
|
- The implementation code to write (file path + content)
|
||||||
|
- Confirmation of which test it targets and how it satisfies the assertion
|
||||||
|
|
||||||
|
## Refactor phase — improve without changing behavior
|
||||||
|
|
||||||
- Improve structure, naming, or clarity only. No new behavior.
|
- Improve structure, naming, or clarity only. No new behavior.
|
||||||
- Tests must remain green after every change.
|
- Tests must remain green after every change.
|
||||||
- If tests break during refactor: revert that change, return status "fail".
|
|
||||||
|
Respond with:
|
||||||
|
- Specific refactoring suggestions with rationale
|
||||||
|
- Which files to touch and what to change
|
||||||
|
- Any risks that could break existing tests
|
||||||
|
|||||||
@@ -1,31 +1,26 @@
|
|||||||
# Trainer Reader Discipline
|
# Trainer Reader Discipline
|
||||||
|
|
||||||
You scan session logs and identify candidate learning moments worth converting to training data.
|
You scan session logs and identify candidate learning moments worth preserving in the brain.
|
||||||
|
|
||||||
## What to look for
|
## What to look for
|
||||||
- **SFT candidates**: the worker did exactly the right thing — a clean pattern worth reinforcing
|
|
||||||
- **DPO candidates**: the worker first produced a wrong or suboptimal response, then corrected — you have both rejected and chosen
|
- **Patterns that worked**: the approach was clean and correct — worth reinforcing
|
||||||
|
- **Corrections**: something was first done wrong, then corrected — both sides are valuable
|
||||||
|
|
||||||
## Scoring (1–5)
|
## Scoring (1–5)
|
||||||
|
|
||||||
- 5: novel pattern, clearly correct, generalises across projects
|
- 5: novel pattern, clearly correct, generalises across projects
|
||||||
- 4: good pattern, correct, somewhat project-specific but still useful
|
- 4: good pattern, correct, somewhat project-specific but still useful
|
||||||
- 3: correct but obvious — include only if especially clean
|
- 3: correct but obvious — include only if especially clean
|
||||||
- 2 or below: skip — too ambiguous or too context-specific
|
- 2 or below: skip
|
||||||
|
|
||||||
## Output contract
|
## Output format
|
||||||
Return JSON result with:
|
|
||||||
- `status`: "pass" or "error"
|
|
||||||
- `phase`: "trainer"
|
|
||||||
- `skill`: "trainer"
|
|
||||||
- `file_path`: ""
|
|
||||||
- `runner_output`: JSON array of candidates (valid JSON, not markdown):
|
|
||||||
[{"type":"sft","moment":"<what happened>","prompt":"<what was asked>","completion":"<what was done right>","score":4},
|
|
||||||
{"type":"dpo","moment":"<what happened>","prompt":"<what was asked>","chosen":"<correct>","rejected":"<incorrect>","score":3}]
|
|
||||||
- `verified`: true
|
|
||||||
- `message`: "N sft candidates, M dpo candidates found"
|
|
||||||
|
|
||||||
## Rules
|
Respond in markdown. List each candidate:
|
||||||
1. Read all session entries in the task prompt
|
|
||||||
2. Score each entry — only include entries scoring >= 3
|
**Candidate N (score: X/5, type: pattern|correction)**
|
||||||
3. Prompt/completion fields must be phrased to generalise: no project-specific paths or names
|
- **What happened:** Brief description of the learning moment
|
||||||
4. If no candidates score >= 3, return an empty array `[]` — never force low-quality candidates
|
- **Why it's valuable:** What makes this worth preserving
|
||||||
|
- **Key insight:** The distilled lesson in one sentence
|
||||||
|
|
||||||
|
End with: "N candidates found (M scoring ≥ 3)" — the writer will use these to produce knowledge entries.
|
||||||
|
|||||||
@@ -1,35 +1,31 @@
|
|||||||
# Trainer Writer Discipline
|
# Trainer Writer Discipline
|
||||||
|
|
||||||
You receive candidate learning moments from the reader and write clean SFT/DPO training pairs.
|
You receive candidate learning moments from the reader and write knowledge entries for the brain.
|
||||||
|
|
||||||
## Quality gate (apply before writing)
|
## Quality gate (apply before writing each entry)
|
||||||
- SFT: prompt must be phrased so it could come from any project, not just this one
|
|
||||||
- DPO: chosen and rejected must be clearly distinguishable — skip if a reader can't tell which is better
|
|
||||||
- Never include project-specific paths, variable names, or identifiers in any pair
|
|
||||||
|
|
||||||
## Output contract
|
- The lesson must be phrased so it could apply to any project, not just this one
|
||||||
Return JSON result with:
|
- No project-specific paths, variable names, or identifiers
|
||||||
- `status`: "pass" (pairs written or skipped due to quality) or "error" (candidates JSON was malformed)
|
- The insight must be stated clearly enough that someone reading it cold would understand it
|
||||||
- `phase`: "trainer"
|
|
||||||
- `skill`: "trainer"
|
|
||||||
- `file_path`: path of the last file written (empty if nothing passed quality gate)
|
|
||||||
- `runner_output`: "N SFT pairs written to brain/training-data/sft/, M DPO pairs to brain/training-data/dpo/" or "0 pairs passed quality gate"
|
|
||||||
- `verified`: true if files were written; false if nothing passed
|
|
||||||
- `message`: "N sft + M dpo pairs for session <id>" or "no pairs passed quality gate"
|
|
||||||
|
|
||||||
## File format
|
## Output format
|
||||||
JSONL — one JSON object per line.
|
|
||||||
|
|
||||||
SFT: `{"prompt": "...", "completion": "..."}`
|
For each candidate that passes the quality gate, write a knowledge entry in this format:
|
||||||
DPO: `{"prompt": "...", "chosen": "...", "rejected": "..."}`
|
|
||||||
|
|
||||||
Write SFT to: `<brain_dir>/training-data/sft/<session_id>.jsonl`
|
```
|
||||||
Write DPO to: `<brain_dir>/training-data/dpo/<session_id>.jsonl`
|
# [Topic]
|
||||||
|
|
||||||
Append to existing files if they exist (don't overwrite).
|
## Lesson
|
||||||
|
[The key insight in 1-3 sentences]
|
||||||
|
|
||||||
## Rules
|
## When it applies
|
||||||
1. Parse the `reader_candidates` JSON from the task prompt
|
[Conditions under which this pattern is relevant]
|
||||||
2. For each candidate: apply quality gate
|
|
||||||
3. Write passing SFT candidates to sft JSONL, DPO candidates to dpo JSONL
|
## Example
|
||||||
4. If nothing passes, return status "pass" with verified: false and message "no pairs passed quality gate"
|
[A brief, generic example that illustrates the lesson]
|
||||||
|
```
|
||||||
|
|
||||||
|
After presenting all entries, end with a summary:
|
||||||
|
"N entries ready for brain_write" or "0 entries passed quality gate — [reason]"
|
||||||
|
|
||||||
|
The caller will write passing entries to the brain using brain_write.
|
||||||
|
|||||||
241
docs/multi-model-routing.md
Normal file
241
docs/multi-model-routing.md
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
# Multi-Model Routing for supervisor
|
||||||
|
|
||||||
|
Reference document for implementing multi-model access within the supervisor project.
|
||||||
|
Researched April 2026. Constraints: Claude Max subscription (ToS must be respected).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Route tasks to specialized, cheaper, or local models during agent and skill flows — without
|
||||||
|
violating Anthropic's terms or introducing unnecessary infrastructure risk.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Hard Constraints
|
||||||
|
|
||||||
|
- Claude Max subscription is in use. Anthropic's April 2026 terms **prohibit using the
|
||||||
|
subscription with third-party harnesses that spoof the Anthropic API surface**.
|
||||||
|
- `ANTHROPIC_BASE_URL` → LiteLLM workaround is explicitly out of scope.
|
||||||
|
- Claude must remain the reasoning engine. Other models are tools, not replacements.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Infrastructure Available
|
||||||
|
|
||||||
|
| Machine | Role | Relevant services |
|
||||||
|
|---------|------|-------------------|
|
||||||
|
| koala | GPU inference | llama-swap, Ollama, Qdrant, LiteLLM proxy |
|
||||||
|
| iguana | Services, builds | k3s, general services |
|
||||||
|
| flamingo | Daily driver | Claude Code runs here |
|
||||||
|
|
||||||
|
LiteLLM proxy on koala exposes 100+ models (local + cloud) through a unified API.
|
||||||
|
All machines connected via Tailscale.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Approved Patterns
|
||||||
|
|
||||||
|
### Pattern 1 — Native Claude model tiering (zero build)
|
||||||
|
|
||||||
|
Claude Code subagents support per-agent model selection via frontmatter.
|
||||||
|
Use this for cost routing within the Claude model family.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ~/.claude/agents/explorer.md
|
||||||
|
---
|
||||||
|
name: explorer
|
||||||
|
description: File reading, code search, codebase mapping — use for all exploration tasks
|
||||||
|
model: haiku
|
||||||
|
---
|
||||||
|
```
|
||||||
|
|
||||||
|
- `haiku` for exploration, summarization, classification
|
||||||
|
- `sonnet` (default) for main reasoning and implementation
|
||||||
|
- `opus` for deep analysis, architecture decisions
|
||||||
|
|
||||||
|
**When to use**: Always. Add `model: haiku` to any subagent that does read-heavy or
|
||||||
|
classification work. Cheapest and fastest path to cost control.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Pattern 2 — MCP tools wrapping local models (primary build target)
|
||||||
|
|
||||||
|
Expose local models on koala as named MCP tools. Claude remains the orchestrator and
|
||||||
|
reasoning engine — it calls local models as tools the same way it calls any other tool.
|
||||||
|
|
||||||
|
This is the intended MCP use case and carries zero ToS risk.
|
||||||
|
|
||||||
|
**Semantic contract**: Claude decides *when* to delegate based on the tool description.
|
||||||
|
Write descriptions that tell Claude what the model is good for.
|
||||||
|
|
||||||
|
#### MCP server implementation
|
||||||
|
|
||||||
|
Small Python server, run on koala or flamingo, registered in Claude Code settings.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# supervisor/scripts/mcp_local_models.py
|
||||||
|
import mcp
|
||||||
|
import requests
|
||||||
|
|
||||||
|
server = mcp.Server("local-models")
|
||||||
|
|
||||||
|
LITELLM_BASE = "http://koala:4000"
|
||||||
|
OLLAMA_BASE = "http://koala:11434"
|
||||||
|
|
||||||
|
def _litellm_chat(model: str, prompt: str) -> str:
|
||||||
|
r = requests.post(f"{LITELLM_BASE}/v1/chat/completions", json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
})
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
@server.tool()
|
||||||
|
def ask_local_llama(prompt: str) -> str:
|
||||||
|
"""Ask the local Llama model on koala.
|
||||||
|
Use for: bulk summarization, first-pass analysis, classification, simple Q&A,
|
||||||
|
anything that does not require deep reasoning or up-to-date knowledge.
|
||||||
|
Faster and cheaper than cloud models for routine subtasks."""
|
||||||
|
return _litellm_chat("llama3-local", prompt)
|
||||||
|
|
||||||
|
|
||||||
|
@server.tool()
|
||||||
|
def ask_coding_model(code: str, question: str) -> str:
|
||||||
|
"""Ask a code-specialized local model.
|
||||||
|
Use for: syntax checking, boilerplate generation, code formatting questions,
|
||||||
|
simple refactors where pattern-matching is sufficient."""
|
||||||
|
return _litellm_chat("codellama-local", f"Code:\n{code}\n\nQuestion: {question}")
|
||||||
|
|
||||||
|
|
||||||
|
@server.tool()
|
||||||
|
def list_available_local_models() -> list[str]:
|
||||||
|
"""List all models currently available on the local LiteLLM proxy."""
|
||||||
|
r = requests.get(f"{LITELLM_BASE}/v1/models")
|
||||||
|
r.raise_for_status()
|
||||||
|
return [m["id"] for m in r.json()["data"]]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
mcp.run_stdio_server(server)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Register in Claude Code
|
||||||
|
|
||||||
|
Add to `~/.claude/settings.json` (or project-level `.claude/settings.json`):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"local-models": {
|
||||||
|
"command": "python3",
|
||||||
|
"args": ["/path/to/supervisor/scripts/mcp_local_models.py"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LiteLLM config additions needed on koala
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# litellm config.yaml — add model entries for local models
|
||||||
|
model_list:
|
||||||
|
- model_name: llama3-local
|
||||||
|
litellm_params:
|
||||||
|
model: ollama/llama3.2
|
||||||
|
api_base: http://localhost:11434
|
||||||
|
|
||||||
|
- model_name: codellama-local
|
||||||
|
litellm_params:
|
||||||
|
model: ollama/codellama
|
||||||
|
api_base: http://localhost:11434
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Pattern 3 — External orchestration scripts (for pipeline workflows)
|
||||||
|
|
||||||
|
For multi-model pipelines that don't need to live inside a Claude Code session.
|
||||||
|
These scripts use their own API key (separate from Max subscription — API billing),
|
||||||
|
so they can call Claude API + LiteLLM freely.
|
||||||
|
|
||||||
|
Claude Code invokes them via the Bash tool.
|
||||||
|
|
||||||
|
```
|
||||||
|
Claude Code → [Bash tool] → ./scripts/orchestrate.py → {Claude API, LiteLLM, local models}
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
# supervisor/scripts/orchestrate.py
|
||||||
|
import anthropic
|
||||||
|
import requests
|
||||||
|
|
||||||
|
claude = anthropic.Anthropic() # reads ANTHROPIC_API_KEY — separate from Max subscription
|
||||||
|
|
||||||
|
def analyze_document(path: str) -> str:
|
||||||
|
with open(path) as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Step 1: local Llama extracts structure (fast, cheap)
|
||||||
|
structure = requests.post("http://koala:4000/v1/chat/completions", json={
|
||||||
|
"model": "llama3-local",
|
||||||
|
"messages": [{"role": "user", "content": f"Extract key sections from:\n{content}"}],
|
||||||
|
}).json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# Step 2: Claude synthesizes and reasons over it
|
||||||
|
synthesis = claude.messages.create(
|
||||||
|
model="claude-sonnet-4-6",
|
||||||
|
max_tokens=2048,
|
||||||
|
messages=[{"role": "user", "content": f"Synthesize these findings:\n{structure}"}]
|
||||||
|
)
|
||||||
|
return synthesis.content[0].text
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to use**: Batch processing, automated pipelines, workflows triggered by cron or
|
||||||
|
external events. Not for interactive Claude Code sessions.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What to Skip
|
||||||
|
|
||||||
|
| Approach | Why skip |
|
||||||
|
|----------|----------|
|
||||||
|
| `ANTHROPIC_BASE_URL` → LiteLLM | ToS violation with Max subscription (April 2026 terms) |
|
||||||
|
| Third-party harnesses (OpenClaw etc.) | Explicitly banned for subscription users |
|
||||||
|
| A2A in Claude Code | Not implemented by Anthropic yet — revisit late 2026 |
|
||||||
|
| OpenAI agent handoffs | Loses execution context, not worth the complexity |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Protocol Landscape (for awareness, not immediate action)
|
||||||
|
|
||||||
|
- **MCP** — production, 97M monthly downloads, your primary tool-access protocol. LiteLLM
|
||||||
|
natively supports it as both MCP gateway and MCP client as of v1.60+.
|
||||||
|
- **A2A v1.0** — Google/Linux Foundation, 150+ orgs in production, but Anthropic has not
|
||||||
|
shipped it in Claude Code. The intent is agent-to-agent peer delegation (vs MCP's
|
||||||
|
agent-to-tool). Worth watching for H2 2026.
|
||||||
|
- **AGNTCY** — Cisco/Linux Foundation, discovery and identity layer beneath MCP+A2A.
|
||||||
|
Potentially relevant for multi-machine routing across koala/iguana/flamingo once mature.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build Priority
|
||||||
|
|
||||||
|
| Step | Effort | Value | When |
|
||||||
|
|------|--------|-------|------|
|
||||||
|
| Add `model: haiku` to explorer subagents | 10 min | Immediate cost saving | Now |
|
||||||
|
| Write MCP server for local models | 2–3h | Local model access in sessions | Soon |
|
||||||
|
| Register MCP server in Claude Code settings | 15 min | Activates pattern 2 | With above |
|
||||||
|
| Write orchestration script template | 1–2h | Pipeline workflows | When needed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- LiteLLM MCP docs: https://docs.litellm.ai/docs/mcp
|
||||||
|
- Community MCP wrapper for LiteLLM: https://github.com/itsDarianNgo/mcp-server-litellm
|
||||||
|
- Ollama MCP server: https://github.com/rawveg/ollama-mcp
|
||||||
|
- A2A protocol status: https://www.linuxfoundation.org/press/a2a-protocol-surpasses-150-organizations-lands-in-major-cloud-platforms-and-sees-enterprise-production-use-in-first-year
|
||||||
|
- AGNTCY: https://github.com/agntcy
|
||||||
2138
docs/superpowers/plans/2026-04-17-hyperguild-phase1.md
Normal file
2138
docs/superpowers/plans/2026-04-17-hyperguild-phase1.md
Normal file
File diff suppressed because it is too large
Load Diff
1871
docs/superpowers/plans/2026-04-19-hyperguild-phase2.md
Normal file
1871
docs/superpowers/plans/2026-04-19-hyperguild-phase2.md
Normal file
File diff suppressed because it is too large
Load Diff
1617
docs/superpowers/plans/2026-04-20-model-orchestration-plan.md
Normal file
1617
docs/superpowers/plans/2026-04-20-model-orchestration-plan.md
Normal file
File diff suppressed because it is too large
Load Diff
2608
docs/superpowers/plans/2026-04-22-brain-ingestion-pipeline.md
Normal file
2608
docs/superpowers/plans/2026-04-22-brain-ingestion-pipeline.md
Normal file
File diff suppressed because it is too large
Load Diff
858
docs/superpowers/plans/2026-04-22-brain-ingestion-quality.md
Normal file
858
docs/superpowers/plans/2026-04-22-brain-ingestion-quality.md
Normal file
@@ -0,0 +1,858 @@
|
|||||||
|
# Brain Ingestion Quality: PDF Extraction + Entity Resolution
|
||||||
|
|
||||||
|
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||||
|
|
||||||
|
**Goal:** Fix PDF ingestion (currently passes raw bytes to LLM) and add fuzzy entity resolution (prevents slug proliferation at scale).
|
||||||
|
|
||||||
|
**Architecture:** Two independent improvements wired into the existing pipeline. A new `extract` package handles text extraction by file type (pdftotext subprocess, passthrough for .md/.txt). A new `resolve.go` in the `pipeline` package normalizes proposed entity/concept titles against the loaded inventory to reuse existing slugs instead of creating duplicates. Both changes are wired into `watcher.go` and `api/handler.go` with no new dependencies except `poppler-utils` in the Docker image.
|
||||||
|
|
||||||
|
**Tech Stack:** Go stdlib (`os/exec`, `bufio`, `strings`), testify, poppler-utils (`pdftotext`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
**New files:**
|
||||||
|
- `ingestion/internal/extract/extract.go` — `Text(path string) (string, error)` dispatcher
|
||||||
|
- `ingestion/internal/extract/pdf.go` — `pdftotext` subprocess extraction
|
||||||
|
- `ingestion/internal/extract/extract_test.go` — table-driven tests for all paths
|
||||||
|
- `ingestion/internal/pipeline/resolve.go` — `Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page`
|
||||||
|
- `ingestion/internal/pipeline/resolve_test.go` — table-driven tests
|
||||||
|
|
||||||
|
**Modified files:**
|
||||||
|
- `ingestion/internal/wiki/types.go` — add `Aliases []string` to `Entry`
|
||||||
|
- `ingestion/internal/wiki/inventory.go` — `readFrontmatter` reads both title and aliases
|
||||||
|
- `ingestion/internal/wiki/inventory_test.go` — add alias coverage
|
||||||
|
- `ingestion/internal/pipeline/pipeline.go` — call `Resolve` after `ParsePages`
|
||||||
|
- `ingestion/internal/watcher/watcher.go` — call `extract.Text` instead of `os.ReadFile`
|
||||||
|
- `ingestion/internal/api/handler.go` — call `extract.Text` for path-based ingestion
|
||||||
|
- `ingestion/Dockerfile` — `apk add poppler-utils`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: `extract` package — Text() dispatcher with .md/.txt passthrough
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/extract/extract.go`
|
||||||
|
- Create: `ingestion/internal/extract/extract_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write the failing test**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/extract_test.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestText_Markdown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.md")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("# Hello\n\nWorld."), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "# Hello\n\nWorld.", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_Txt(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.txt")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("plain text"), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "plain text", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_UnsupportedExtension(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "data.csv")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a,b,c"), 0o644))
|
||||||
|
|
||||||
|
_, err := Text(path)
|
||||||
|
assert.ErrorContains(t, err, "unsupported")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: compile error — package does not exist yet.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement extract.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/extract.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Text reads the file at path and returns its plain-text content.
|
||||||
|
// Supported extensions: .md, .txt (passthrough), .pdf (via pdftotext).
|
||||||
|
func Text(path string) (string, error) {
|
||||||
|
ext := strings.ToLower(fileExt(path))
|
||||||
|
switch ext {
|
||||||
|
case ".md", ".txt":
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
case ".pdf":
|
||||||
|
return extractPDF(path)
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unsupported file extension: %s", ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fileExt returns the file extension including the dot, lowercased.
|
||||||
|
func fileExt(path string) string {
|
||||||
|
for i := len(path) - 1; i >= 0; i-- {
|
||||||
|
if path[i] == '.' {
|
||||||
|
return path[i:]
|
||||||
|
}
|
||||||
|
if path[i] == '/' || path[i] == '\\' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Add pdf.go stub so it compiles**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func extractPDF(_ string) (string, error) {
|
||||||
|
return "", fmt.Errorf("PDF extraction not implemented")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run tests to verify they pass**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — 3 tests passing.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/extract/
|
||||||
|
git commit -m "feat(extract): add Text() dispatcher with md/txt passthrough"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: PDF extraction via pdftotext
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/extract/pdf.go`
|
||||||
|
- Modify: `ingestion/internal/extract/extract_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add PDF test (skip if pdftotext absent)**
|
||||||
|
|
||||||
|
Append to `extract_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func TestText_PDF(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("pdftotext"); err != nil {
|
||||||
|
t.Skip("pdftotext not available")
|
||||||
|
}
|
||||||
|
// Use a known PDF fixture; if none, create a minimal one via echo.
|
||||||
|
// The test verifies the round-trip: a PDF containing "Hello PDF" yields that string.
|
||||||
|
dir := t.TempDir()
|
||||||
|
pdfPath := filepath.Join(dir, "test.pdf")
|
||||||
|
|
||||||
|
// Generate a minimal single-page PDF using a here-doc approach.
|
||||||
|
// This is a valid minimal PDF containing the text "Hello PDF".
|
||||||
|
minimalPDF := "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n" +
|
||||||
|
"2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n" +
|
||||||
|
"3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>>>>>>>endobj\n" +
|
||||||
|
"4 0 obj<</Length 44>>\nstream\nBT /F1 12 Tf 100 700 Td (Hello PDF) Tj ET\nendstream\nendobj\n" +
|
||||||
|
"xref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000058 00000 n\n0000000115 00000 n\n0000000310 00000 n\n" +
|
||||||
|
"trailer<</Size 5/Root 1 0 R>>\nstartxref\n406\n%%EOF\n"
|
||||||
|
require.NoError(t, os.WriteFile(pdfPath, []byte(minimalPDF), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(pdfPath)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, got, "Hello PDF")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Add `"os/exec"` to imports in `extract_test.go`.
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails (or skips)**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v -run TestText_PDF
|
||||||
|
```
|
||||||
|
Expected: SKIP (pdftotext not installed locally) or FAIL with "not implemented".
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement pdf.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractPDF runs pdftotext on path and returns the extracted text.
|
||||||
|
// pdftotext must be installed (package: poppler-utils on Alpine/Debian, poppler on Homebrew).
|
||||||
|
func extractPDF(path string) (string, error) {
|
||||||
|
cmd := exec.Command("pdftotext", "-q", path, "-")
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
errMsg := strings.TrimSpace(stderr.String())
|
||||||
|
if errMsg == "" {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("pdftotext: %s", errMsg)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(stdout.String()), nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run all extract tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS (PDF test skips if pdftotext absent, passes if present).
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/extract/pdf.go internal/extract/extract_test.go
|
||||||
|
git commit -m "feat(extract): implement PDF extraction via pdftotext"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 3: `Entry.Aliases` + inventory reads aliases from frontmatter
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/wiki/types.go`
|
||||||
|
- Modify: `ingestion/internal/wiki/inventory.go`
|
||||||
|
- Modify: `ingestion/internal/wiki/inventory_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write failing test for alias loading**
|
||||||
|
|
||||||
|
Add to `inventory_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func TestLoadInventory_ReadsAliases(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "entities", "ryan-singer.md"),
|
||||||
|
[]byte("---\ntitle: Ryan Singer\naliases:\n - Singer\n - R. Singer\n---\n\n## Description\n\nDesigner.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, inv[PageTypeEntity], 1)
|
||||||
|
e := inv[PageTypeEntity][0]
|
||||||
|
assert.Equal(t, "Ryan Singer", e.Title)
|
||||||
|
assert.Equal(t, []string{"Singer", "R. Singer"}, e.Aliases)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/wiki/... -v -run TestLoadInventory_ReadsAliases
|
||||||
|
```
|
||||||
|
Expected: compile error — `Entry` has no `Aliases` field.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Add Aliases to Entry in types.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Entry is a summary of an existing wiki page used to build the inventory.
|
||||||
|
type Entry struct {
|
||||||
|
Slug string
|
||||||
|
Title string
|
||||||
|
Aliases []string
|
||||||
|
Type PageType
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Replace readTitle with readFrontmatter in inventory.go**
|
||||||
|
|
||||||
|
Replace the `readTitle` function and its call site:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// readFrontmatter extracts title and aliases from YAML frontmatter.
|
||||||
|
// Falls back to slug for title and empty aliases on any error.
|
||||||
|
func readFrontmatter(path, fallbackSlug string) (title string, aliases []string) {
|
||||||
|
title = fallbackSlug
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
inFM := false
|
||||||
|
inAliases := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break // end of frontmatter
|
||||||
|
}
|
||||||
|
if !inFM {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect alias list items (lines starting with " - ").
|
||||||
|
if inAliases {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(trimmed, "- ") {
|
||||||
|
aliases = append(aliases, strings.TrimPrefix(trimmed, "- "))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
inAliases = false // end of alias block
|
||||||
|
}
|
||||||
|
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "title":
|
||||||
|
title = strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
case "aliases":
|
||||||
|
inAliases = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Update `LoadInventory` to use `readFrontmatter`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
title, aliases := readFrontmatter(path, slug)
|
||||||
|
result[pt] = append(result[pt], Entry{Slug: slug, Title: title, Aliases: aliases, Type: pt})
|
||||||
|
```
|
||||||
|
|
||||||
|
Remove the old `readTitle` function entirely.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run all wiki tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/wiki/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — all existing tests plus new alias test.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/wiki/types.go internal/wiki/inventory.go internal/wiki/inventory_test.go
|
||||||
|
git commit -m "feat(wiki): add Aliases to Entry and read from YAML frontmatter"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 4: Fuzzy entity resolution
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/pipeline/resolve.go`
|
||||||
|
- Create: `ingestion/internal/pipeline/resolve_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write failing tests**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/resolve_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestResolve_NoMatch(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/new-person.md", Content: "---\ntitle: New Person\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/new-person.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_TitleMatchRedirectsSlug(t *testing.T) {
|
||||||
|
// Proposed slug differs from existing but title matches.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/ryan-singer-the-designer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_AliasMatchRedirectsSlug(t *testing.T) {
|
||||||
|
// Proposed title matches an existing alias.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/singer.md", Content: "---\ntitle: Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer", "R. Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_NormalizationCaseAndArticles(t *testing.T) {
|
||||||
|
// "the shape up method" normalizes to "shape up method" which matches "Shape Up Method".
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/the-shape-up-method.md", Content: "---\ntitle: The Shape Up Method\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "shape-up-method", Title: "Shape Up Method", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/shape-up-method.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_OnlyMatchesSamePageType(t *testing.T) {
|
||||||
|
// A concept slug must not redirect to an entity with the same normalized name.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/ryan-singer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
// Not redirected — different page type.
|
||||||
|
assert.Equal(t, "wiki/concepts/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_EmptyInventory(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/first.md", Content: "---\ntitle: First\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Equal(t, proposed, got)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v -run TestResolve
|
||||||
|
```
|
||||||
|
Expected: compile error — `Resolve` not defined.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement resolve.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/resolve.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Resolve remaps proposed pages to existing slugs when a fuzzy title match is found.
|
||||||
|
// It only matches within the same page type (entities→entities, concepts→concepts).
|
||||||
|
// Pages with no inventory match are returned unchanged.
|
||||||
|
func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page {
|
||||||
|
// Build normalized lookup: normalized_title → canonical slug, keyed by page type.
|
||||||
|
type key struct {
|
||||||
|
pt wiki.PageType
|
||||||
|
normalized string
|
||||||
|
}
|
||||||
|
lookup := make(map[key]string) // key → canonical slug
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(e.Title)}
|
||||||
|
lookup[k] = e.Slug
|
||||||
|
for _, alias := range e.Aliases {
|
||||||
|
ak := key{pt: pt, normalized: normalizeTitle(alias)}
|
||||||
|
if _, exists := lookup[ak]; !exists {
|
||||||
|
lookup[ak] = e.Slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]wiki.Page, 0, len(proposed))
|
||||||
|
for _, page := range proposed {
|
||||||
|
pt := pageTypeFromPath(page.Path)
|
||||||
|
title := extractTitle(page.Content)
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(title)}
|
||||||
|
if canonicalSlug, ok := lookup[k]; ok {
|
||||||
|
// Redirect path to canonical slug.
|
||||||
|
dir := filepath.Dir(page.Path)
|
||||||
|
page.Path = dir + "/" + canonicalSlug + ".md"
|
||||||
|
}
|
||||||
|
out = append(out, page)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeTitle lowercases, removes leading articles, collapses whitespace.
|
||||||
|
// "The Shape Up Method" → "shape up method"
|
||||||
|
func normalizeTitle(s string) string {
|
||||||
|
s = strings.ToLower(strings.TrimSpace(s))
|
||||||
|
// Strip leading articles.
|
||||||
|
for _, article := range []string{"the ", "a ", "an "} {
|
||||||
|
s = strings.TrimPrefix(s, article)
|
||||||
|
}
|
||||||
|
// Collapse internal whitespace and replace hyphens.
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
return strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md".
|
||||||
|
func pageTypeFromPath(path string) wiki.PageType {
|
||||||
|
parts := strings.Split(filepath.ToSlash(path), "/")
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
return wiki.PageType(parts[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractTitle reads the title field from YAML frontmatter in content.
|
||||||
|
// Falls back to empty string if not found.
|
||||||
|
func extractTitle(content string) string {
|
||||||
|
lines := strings.SplitN(content, "\n", 30)
|
||||||
|
inFM := false
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if inFM {
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if ok && strings.TrimSpace(key) == "title" {
|
||||||
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run resolve tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v -run TestResolve
|
||||||
|
```
|
||||||
|
Expected: PASS — 6 tests passing.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/pipeline/resolve.go internal/pipeline/resolve_test.go
|
||||||
|
git commit -m "feat(pipeline): add fuzzy entity resolution to prevent slug proliferation"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 5: Wire Resolve into pipeline.Run
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/pipeline/pipeline.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add Resolve call after ParsePages in Run()**
|
||||||
|
|
||||||
|
In `pipeline.go`, locate the loop that builds `allPages`. After `allPages = append(allPages, pages...)`, we have all pages from all chunks. Resolve must run after all chunks are merged, against the snapshot inventory loaded at the start of the run.
|
||||||
|
|
||||||
|
Replace the `merged := mergeAll(allPages)` line with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
The full relevant section of `Run` after this change:
|
||||||
|
|
||||||
|
```go
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
userPrompt := BuildPrompt(schema, source, chunk, inventory)
|
||||||
|
output, err := cfg.Complete(ctx, systemPrompt, userPrompt)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("LLM call: %w", err)
|
||||||
|
}
|
||||||
|
pages, warnings := ParsePages(output)
|
||||||
|
allPages = append(allPages, pages...)
|
||||||
|
allWarnings = append(allWarnings, warnings...)
|
||||||
|
}
|
||||||
|
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — all existing tests still pass (Resolve is a no-op when inventory is empty or no title matches).
|
||||||
|
|
||||||
|
- [ ] **Step 3: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/pipeline/pipeline.go
|
||||||
|
git commit -m "feat(pipeline): resolve proposed pages against inventory before writing"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 6: Wire extract.Text into watcher and handler
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/watcher/watcher.go`
|
||||||
|
- Modify: `ingestion/internal/api/handler.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Update watcher.go**
|
||||||
|
|
||||||
|
In `processFile`, replace:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, string(content), source, false)
|
||||||
|
```
|
||||||
|
|
||||||
|
With:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, err := extract.Text(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("extract text: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, content, source, false)
|
||||||
|
```
|
||||||
|
|
||||||
|
Add import: `"github.com/mathiasbq/hyperguild/ingestion/internal/extract"`
|
||||||
|
|
||||||
|
Remove import: `"os"` if no longer used (check — `os` is still used for `os.MkdirAll`, `os.WriteFile`, `os.Stat`; keep it).
|
||||||
|
|
||||||
|
- [ ] **Step 2: Update handler.go — single-file path**
|
||||||
|
|
||||||
|
In `IngestPath`, the single-file branch reads:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := os.ReadFile(req.Path)
|
||||||
|
if readErr != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("read file: %v", readErr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := extract.Text(req.Path)
|
||||||
|
if readErr != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 3: Update handler.go — directory walk branch**
|
||||||
|
|
||||||
|
In `IngestPath`, the directory walk reads:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := os.ReadFile(path)
|
||||||
|
if readErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("read %s: %v", path, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, string(content), source, req.DryRun)
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := extract.Text(path)
|
||||||
|
if readErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
|
```
|
||||||
|
|
||||||
|
Add import: `"github.com/mathiasbq/hyperguild/ingestion/internal/extract"` to handler.go.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Build to verify no compile errors**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go build ./...
|
||||||
|
```
|
||||||
|
Expected: success, no errors.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run all tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./...
|
||||||
|
```
|
||||||
|
Expected: PASS — all tests pass (watcher tests use .md files, already covered by extract passthrough).
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/watcher/watcher.go internal/api/handler.go
|
||||||
|
git commit -m "feat(watcher,api): use extract.Text() for file reading — fixes PDF ingestion"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 7: Add poppler-utils to Dockerfile
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/Dockerfile`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add apk install for poppler-utils**
|
||||||
|
|
||||||
|
In `ingestion/Dockerfile`, add `poppler-utils` to the Alpine runtime stage. The current final stage is:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
RUN apk add --no-cache poppler-utils
|
||||||
|
|
||||||
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Verify Dockerfile builds (local Docker)**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && docker build -t ingestion:test .
|
||||||
|
```
|
||||||
|
Expected: image builds successfully; `pdftotext` is available inside.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Verify pdftotext is accessible in the image**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm ingestion:test pdftotext -v
|
||||||
|
```
|
||||||
|
Expected: prints version string like `pdftotext version 24.x.x`.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add Dockerfile
|
||||||
|
git commit -m "chore(docker): add poppler-utils for PDF text extraction"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Self-Review
|
||||||
|
|
||||||
|
**Spec coverage check:**
|
||||||
|
|
||||||
|
| Requirement | Task |
|
||||||
|
|---|---|
|
||||||
|
| PDF extraction via pdftotext | Tasks 2, 6, 7 |
|
||||||
|
| .md and .txt passthrough (no regression) | Task 1 |
|
||||||
|
| Unsupported extension → clear error | Task 1 |
|
||||||
|
| Entry.Aliases loaded from frontmatter | Task 3 |
|
||||||
|
| Fuzzy normalization (case, articles, hyphens) | Task 4 |
|
||||||
|
| Alias matching | Task 4 |
|
||||||
|
| Title matching across different proposed slugs | Task 4 |
|
||||||
|
| Cross-page-type isolation (concept ≠ entity) | Task 4 |
|
||||||
|
| Resolve wired into pipeline.Run | Task 5 |
|
||||||
|
| extract.Text wired into watcher | Task 6 |
|
||||||
|
| extract.Text wired into handler (single + dir) | Task 6 |
|
||||||
|
| Dockerfile includes poppler-utils | Task 7 |
|
||||||
|
|
||||||
|
**Placeholder scan:** None found.
|
||||||
|
|
||||||
|
**Type consistency:**
|
||||||
|
- `Resolve([]wiki.Page, map[wiki.PageType][]wiki.Entry) []wiki.Page` — consistent across Tasks 4 and 5.
|
||||||
|
- `extract.Text(path string) (string, error)` — consistent across Tasks 1, 2, and 6.
|
||||||
|
- `Entry.Aliases []string` — added in Task 3, used by Resolve in Task 4 (reads `e.Aliases`).
|
||||||
|
- `readFrontmatter` replaces `readTitle` entirely in Task 3 — no lingering `readTitle` calls.
|
||||||
1073
docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md
Normal file
1073
docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md
Normal file
File diff suppressed because it is too large
Load Diff
1323
docs/superpowers/plans/2026-04-23-level3-slug-authority.md
Normal file
1323
docs/superpowers/plans/2026-04-23-level3-slug-authority.md
Normal file
File diff suppressed because it is too large
Load Diff
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
@@ -0,0 +1,433 @@
|
|||||||
|
# Source Back-References Implementation Plan
|
||||||
|
|
||||||
|
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||||
|
|
||||||
|
**Goal:** After the LLM produces wiki pages for an ingestion, automatically inject a `## Sources` back-reference on every concept and entity page that the source page links to.
|
||||||
|
|
||||||
|
**Architecture:** A new `injectSourceRefs` post-processing step is inserted between `Resolve` and `mergeAll` in `pipeline.Run`. It finds the source page in the proposed batch, extracts all `[[slug|...]]` wikilinks, then calls `wiki.Merge` with a minimal patch page to add the back-reference. `wiki.Merge` already treats `## Sources` as a bullet section with deduplication — no custom section parsing is needed. For concepts/entities that exist on disk but weren't proposed in the current batch (the common case on re-ingestion), the function loads them from disk and adds them to the pages list so they are updated.
|
||||||
|
|
||||||
|
**Tech Stack:** Go stdlib (`regexp`, `os`, `path/filepath`, `strings`), existing `wiki.Merge` and `wiki.Page` types.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
**New files:**
|
||||||
|
- `ingestion/internal/pipeline/refs.go` — `injectSourceRefs`, `addSourceRef`, `extractWikilinks`, `findSourcePage`, `findInInventory`
|
||||||
|
- `ingestion/internal/pipeline/refs_test.go` — table-driven tests
|
||||||
|
|
||||||
|
**Modified files:**
|
||||||
|
- `ingestion/internal/pipeline/pipeline.go` — insert `injectSourceRefs` call between `Resolve` and `mergeAll`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: `refs.go` — source back-reference injection
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/pipeline/refs_test.go`
|
||||||
|
- Create: `ingestion/internal/pipeline/refs.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write the failing tests**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/refs_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// makeInventory builds a minimal inventory for test use.
|
||||||
|
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||||
|
inv := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
wiki.PageTypeEntity: {},
|
||||||
|
wiki.PageTypeSource: {},
|
||||||
|
}
|
||||||
|
for _, slug := range concepts {
|
||||||
|
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
for _, slug := range entities {
|
||||||
|
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
return inv
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Equal(t, pages, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/domain-driven-design.md",
|
||||||
|
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
assert.Contains(t, got[1].Content, "## Sources")
|
||||||
|
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||||
|
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(conceptDir, "shape-up.md"),
|
||||||
|
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inv := makeInventory([]string{"shape-up"}, nil)
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, inv, brainDir)
|
||||||
|
|
||||||
|
// Should have loaded shape-up.md from disk and added it with source ref.
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var conceptPage wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/concepts/shape-up.md" {
|
||||||
|
conceptPage = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||||
|
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||||
|
// Original content preserved.
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
// Only one page — source should not reference itself.
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||||
|
// Concept already has source ref from a prior ingestion.
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/ddd.md",
|
||||||
|
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
// The source ref must appear exactly once.
|
||||||
|
count := 0
|
||||||
|
for _, line := range splitLines(got[1].Content) {
|
||||||
|
if line == "- [[my-article|My Article]]" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/book.md",
|
||||||
|
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/entities/ryan-singer.md",
|
||||||
|
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var entity wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||||
|
entity = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractWikilinks(t *testing.T) {
|
||||||
|
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||||
|
got := extractWikilinks(content)
|
||||||
|
assert.True(t, got["foo"])
|
||||||
|
assert.True(t, got["bar"])
|
||||||
|
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitLines is a test helper.
|
||||||
|
func splitLines(s string) []string {
|
||||||
|
var out []string
|
||||||
|
for _, l := range splitNewlines(s) {
|
||||||
|
if l != "" {
|
||||||
|
out = append(out, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitNewlines(s string) []string {
|
||||||
|
var lines []string
|
||||||
|
start := 0
|
||||||
|
for i, c := range s {
|
||||||
|
if c == '\n' {
|
||||||
|
lines = append(lines, s[start:i])
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines = append(lines, s[start:])
|
||||||
|
return lines
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify they fail**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -run "TestInjectSourceRefs|TestExtractWikilinks" -v
|
||||||
|
```
|
||||||
|
Expected: compile error — `injectSourceRefs` and `extractWikilinks` not defined.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement refs.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/refs.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||||
|
|
||||||
|
// injectSourceRefs finds the source page in the proposed batch, extracts its wikilinks,
|
||||||
|
// and injects a back-reference into every linked concept or entity page.
|
||||||
|
// Pages that exist on disk but are not in the current batch are loaded and appended
|
||||||
|
// so they will be updated on write.
|
||||||
|
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||||
|
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||||
|
if !found {
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Locate source page content for wikilink extraction.
|
||||||
|
var sourceContent string
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||||
|
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||||
|
sourceContent = p.Content
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
linkedSlugs := extractWikilinks(sourceContent)
|
||||||
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||||
|
|
||||||
|
// Build slug → index map for proposed pages (excluding wiki/sources/).
|
||||||
|
bySlug := make(map[string]int, len(pages))
|
||||||
|
for i, p := range pages {
|
||||||
|
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for slug := range linkedSlugs {
|
||||||
|
if slug == sourceSlug {
|
||||||
|
continue // no self-reference
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx, ok := bySlug[slug]; ok {
|
||||||
|
// Concept/entity is in the proposed batch — inject inline.
|
||||||
|
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not in proposed batch — look for it in the inventory (exists on disk).
|
||||||
|
pt, ok := findInInventory(slug, inventory)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||||
|
b, err := os.ReadFile(diskPath)
|
||||||
|
if err != nil {
|
||||||
|
continue // page not found on disk; skip
|
||||||
|
}
|
||||||
|
page := wiki.Page{
|
||||||
|
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||||
|
Content: string(b),
|
||||||
|
}
|
||||||
|
pages = append(pages, addSourceRef(page, sourceRef))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// addSourceRef injects sourceRef into the ## Sources bullet section of page.
|
||||||
|
// Uses wiki.Merge so that existing Sources entries are deduplicated and all
|
||||||
|
// other sections are preserved unchanged.
|
||||||
|
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||||
|
patch := wiki.Page{
|
||||||
|
Path: page.Path,
|
||||||
|
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||||
|
}
|
||||||
|
return wiki.Merge(page, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||||
|
func extractWikilinks(content string) map[string]bool {
|
||||||
|
slugs := make(map[string]bool)
|
||||||
|
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||||
|
slugs[m[1]] = true
|
||||||
|
}
|
||||||
|
return slugs
|
||||||
|
}
|
||||||
|
|
||||||
|
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||||
|
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||||
|
title = extractTitle(p.Content)
|
||||||
|
if title == "" {
|
||||||
|
title = slug
|
||||||
|
}
|
||||||
|
return slug, title, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||||
|
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.Slug == slug {
|
||||||
|
return pt, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: all existing tests PASS + 7 new refs tests PASS.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/refs.go ingestion/internal/pipeline/refs_test.go && git commit -m "feat(pipeline): inject source back-references into concept and entity pages"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: Wire injectSourceRefs into pipeline.Run
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/pipeline/pipeline.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Insert the call**
|
||||||
|
|
||||||
|
In `pipeline.go`, locate:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
withRefs := injectSourceRefs(resolved, inventory, brainDir)
|
||||||
|
merged := mergeAll(withRefs)
|
||||||
|
```
|
||||||
|
|
||||||
|
No import changes needed — same package.
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: all tests PASS. The existing `TestRun_WritesPages` and `TestRun_DryRunDoesNotWrite` use LLM mocks that return source pages with no wikilinks to concepts — `injectSourceRefs` is a no-op for them.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Run full test suite + lint**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./... && golangci-lint run ./...
|
||||||
|
```
|
||||||
|
Expected: all packages PASS, 0 lint issues.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/pipeline.go && git commit -m "feat(pipeline): wire source back-reference injection into Run"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Self-Review
|
||||||
|
|
||||||
|
**Spec coverage:**
|
||||||
|
|
||||||
|
| Requirement | Task |
|
||||||
|
|---|---|
|
||||||
|
| Concepts get `## Sources` back-link to ingested source | Task 1 |
|
||||||
|
| Entities get `## Sources` back-link | Task 1 (TestInjectSourceRefs_InjectsIntoEntity) |
|
||||||
|
| Existing pages on disk get updated with new source | Task 1 (TestInjectSourceRefs_LoadsConceptFromDisk) |
|
||||||
|
| Re-ingestion of same source does not duplicate the ref | Task 1 (TestInjectSourceRefs_DeduplicatesOnReingestion) |
|
||||||
|
| Source page does not reference itself | Task 1 (TestInjectSourceRefs_NoSelfReference) |
|
||||||
|
| No-op when batch has no source page | Task 1 (TestInjectSourceRefs_NoSourcePage) |
|
||||||
|
| Wired into Run between Resolve and mergeAll | Task 2 |
|
||||||
|
| Full test suite and lint pass | Task 2 Step 3 |
|
||||||
|
|
||||||
|
**Placeholder scan:** None.
|
||||||
|
|
||||||
|
**Type consistency:** `injectSourceRefs([]wiki.Page, map[wiki.PageType][]wiki.Entry, string) []wiki.Page` — used identically in refs.go (definition) and pipeline.go (call site).
|
||||||
@@ -0,0 +1,240 @@
|
|||||||
|
# Brain Ingestion Pipeline — Design Spec
|
||||||
|
|
||||||
|
**Date:** 2026-04-22
|
||||||
|
**Status:** approved
|
||||||
|
**Author:** Mathias + Claude
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Add a structured ingestion pipeline to the hyperguild brain. The pipeline accepts raw content (directly or from files) and uses an LLM to produce structured wiki pages in `brain/wiki/` — the declarative layer of the Two-Layer Brain. Three fixed knowledge classes: **concepts**, **entities**, **sources**.
|
||||||
|
|
||||||
|
This spec covers:
|
||||||
|
- Three new packages in the `ingestion` Go module (`llm`, `wiki`, `pipeline`, `watcher`)
|
||||||
|
- Two new HTTP endpoints on the ingestion server (`/ingest`, `/ingest-path`)
|
||||||
|
- A background file watcher for `brain/raw/`
|
||||||
|
- Config additions to both the ingestion server and the supervisor
|
||||||
|
|
||||||
|
It does **not** cover Layer 2 (training data, `brain/training-data/`) — that is the trainer worker's concern.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Information Model
|
||||||
|
|
||||||
|
Three fixed wiki page classes, matching the Two-Layer Brain design spec and the existing `ingestion-svc` model:
|
||||||
|
|
||||||
|
### `wiki/sources/<slug>.md`
|
||||||
|
One page per ingested source (project, book, article, note). Updated (not replaced) on re-ingestion.
|
||||||
|
|
||||||
|
Required frontmatter: `title`, `type` (article|pdf|book|video|note|project), `domain`, `source_url`, `date_ingested`, `last_updated`, `aliases`.
|
||||||
|
|
||||||
|
Body sections: Summary · Key Claims · Concepts Introduced or Reinforced · Entities Mentioned · Open Questions Raised. Books add: Chapters · Argument Arc · Updates (dated, append-only).
|
||||||
|
|
||||||
|
### `wiki/concepts/<slug>.md`
|
||||||
|
One page per idea, framework, methodology, or pattern (e.g. Domain Driven Design, TDD, event sourcing).
|
||||||
|
|
||||||
|
Required frontmatter: `title`, `domain`, `last_updated`, `aliases`.
|
||||||
|
|
||||||
|
Body sections: Definition · Why It Matters · Related Concepts · Related Entities · Sources · Evolving Notes.
|
||||||
|
|
||||||
|
### `wiki/entities/<slug>.md`
|
||||||
|
One page per person, tool, organisation, technology, or product.
|
||||||
|
|
||||||
|
Required frontmatter: `title`, `type` (person|company|tool|model|framework|technology), `domain`, `last_updated`, `aliases`.
|
||||||
|
|
||||||
|
Body sections: Description · Relevance · Key Positions/Products/Claims · Related Concepts · Related Entities · Sources.
|
||||||
|
|
||||||
|
### Wikilink format
|
||||||
|
All cross-references use `[[slug|Display Text]]`. Slug = lowercase title, spaces→hyphens, non-alphanumeric stripped. Slugs must resolve to an existing file in the wiki.
|
||||||
|
|
||||||
|
### Supporting files
|
||||||
|
- `brain/wiki/index.md` — auto-rebuilt on every ingest: one-sentence summary per page, grouped by type
|
||||||
|
- `brain/log.md` — append-only audit trail: date, source, pages written, warnings
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### New packages (`ingestion` module)
|
||||||
|
|
||||||
|
```
|
||||||
|
ingestion/internal/
|
||||||
|
llm/ — OpenAI-compatible HTTP client (chat completions, retry on 429,
|
||||||
|
configurable timeout and temperature)
|
||||||
|
wiki/ — Page types, slug utilities, merge logic, inventory loader,
|
||||||
|
index rebuilder, log appender
|
||||||
|
pipeline/ — Orchestrates one ingest run end-to-end (content or extracted file text)
|
||||||
|
watcher/ — Polls brain/raw/ and triggers pipeline on new files
|
||||||
|
```
|
||||||
|
|
||||||
|
The existing `api/` and `search/` packages are updated; no other existing packages change.
|
||||||
|
|
||||||
|
### Brain directory layout
|
||||||
|
|
||||||
|
```
|
||||||
|
brain/
|
||||||
|
wiki/
|
||||||
|
concepts/ ← LLM-structured concept pages
|
||||||
|
entities/ ← LLM-structured entity pages
|
||||||
|
sources/ ← LLM-structured source pages
|
||||||
|
index.md ← auto-rebuilt on each ingest
|
||||||
|
knowledge/ ← quick raw notes via brain_write (BM25-searchable, unchanged)
|
||||||
|
raw/ ← drop zone; watcher picks up files here
|
||||||
|
processed/ ← moved here on success (organised by date: processed/YYYY-MM-DD/)
|
||||||
|
failed/ ← moved here on failure
|
||||||
|
sessions/ ← session logs (retrospective/trainer concern, not touched here)
|
||||||
|
training-data/ ← Layer 2 (trainer worker concern, not touched here)
|
||||||
|
log.md ← append-only audit trail
|
||||||
|
CLAUDE.md ← schema document injected into every ingest prompt
|
||||||
|
```
|
||||||
|
|
||||||
|
If `brain/CLAUDE.md` is absent, the pipeline falls back to an embedded default schema compiled into the binary.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
### `POST /ingest`
|
||||||
|
|
||||||
|
Ingest content provided directly by the caller.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"content": "...",
|
||||||
|
"source": "shape-up-book",
|
||||||
|
"dry_run": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"pages": ["wiki/sources/shape-up.md", "wiki/concepts/betting-table.md"],
|
||||||
|
"warnings": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`source` is the human-readable name used when writing/updating `wiki/sources/<slug>.md`. `dry_run: true` returns the page contents without writing.
|
||||||
|
|
||||||
|
### `POST /ingest-path`
|
||||||
|
|
||||||
|
Ingest a file or walk a directory recursively. Supports `.md`, `.txt`, `.pdf`.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"path": "/Users/mathias/brain/raw/shape-up.pdf",
|
||||||
|
"source": "shape-up-book",
|
||||||
|
"dry_run": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If `path` is a directory, all supported files within it are ingested in sequence. `source` is optional for directory ingestion — if omitted, the LLM derives it from each file's name and content.
|
||||||
|
|
||||||
|
**Response:** same shape as `/ingest`, with pages and warnings aggregated across all files.
|
||||||
|
|
||||||
|
### Supervisor skill update
|
||||||
|
|
||||||
|
`brain_ingest` in `internal/skills/brain/handlers.go` gains an optional `path` field. If `path` is set, it calls `/ingest-path`; otherwise `/ingest`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline
|
||||||
|
|
||||||
|
`pipeline.Run(ctx, cfg, brainDir, content, source, dryRun)` — called by both HTTP handlers after any file reading is done.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. **Load inventory** — walk `brain/wiki/{concepts,entities,sources}/`, build slug index grouped by type. Injected into prompt so LLM knows what to update vs create.
|
||||||
|
2. **Load schema** — read `brain/CLAUDE.md`; fall back to embedded default if absent.
|
||||||
|
3. **Chunk** — split content at `INGEST_CHUNK_SIZE` chars (default 6000; split on paragraph boundary). If `INGEST_CHUNK_SIZE=0`, no chunking.
|
||||||
|
4. **LLM call per chunk** — returns JSON array of `{"path": "wiki/concepts/foo.md", "content": "..."}`. Prompt structure: system instruction → date → schema → inventory → non-negotiable slug/wikilink rules → source content.
|
||||||
|
5. **Parse + truncation recovery** — strip markdown fences if present. If JSON array is truncated mid-object (token limit), salvage all complete objects before the break and log a warning.
|
||||||
|
6. **Merge** — combine pages with the same path across chunks:
|
||||||
|
- Bullet sections (Related Concepts, Related Entities, Sources, Key Claims): union unique lines
|
||||||
|
- Append sections (Evolving Notes, Updates, Open Questions): append new content
|
||||||
|
- All other sections: keep first occurrence
|
||||||
|
- Frontmatter: keep first occurrence
|
||||||
|
7. **Write** — create subdirs as needed, write files atomically. In dry-run mode, return page map without writing.
|
||||||
|
8. **Rebuild `index.md`** — one-sentence summary per page (derived from first body paragraph), grouped by type, with page count header.
|
||||||
|
9. **Append to `log.md`** — date, source, list of pages written, warning count.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Watcher
|
||||||
|
|
||||||
|
Background goroutine started at server startup (when `INGEST_WATCH_INTERVAL > 0`).
|
||||||
|
|
||||||
|
**Poll loop:**
|
||||||
|
1. Walk `brain/raw/` for files with supported extensions (`.md`, `.txt`, `.pdf`), excluding `processed/` and `failed/` subdirs.
|
||||||
|
2. For each file found: derive source from filename (strip extension, kebab-to-title), call `pipeline.Run` with the file content.
|
||||||
|
3. On success: move file to `brain/raw/processed/YYYY-MM-DD/<filename>`.
|
||||||
|
4. On failure: move file to `brain/raw/failed/<filename>`, append error to `brain/log.md`.
|
||||||
|
5. Sleep `INGEST_WATCH_INTERVAL` seconds, repeat.
|
||||||
|
|
||||||
|
Files are processed one at a time (no concurrency within the watcher) to avoid LLM rate-limit collisions.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## LLM Prompt
|
||||||
|
|
||||||
|
**System:**
|
||||||
|
> You are a wiki agent. Read the source material and produce structured wiki pages following the schema provided. Output ONLY a valid JSON array — no markdown fences, no other text. Each element must have: `"path"` (relative path within wiki, e.g. `"wiki/sources/foo.md"`) and `"content"` (full markdown including YAML frontmatter). Follow the schema strictly: correct frontmatter fields, wikilinks as `[[slug|Display Text]]`, dates in YYYY-MM-DD format, paraphrase rather than quoting verbatim.
|
||||||
|
|
||||||
|
**User (built dynamically):**
|
||||||
|
1. Today's date
|
||||||
|
2. Full schema (`brain/CLAUDE.md` content)
|
||||||
|
3. Existing wiki inventory grouped by type (for update-vs-create decisions)
|
||||||
|
4. Non-negotiable rules: slug format, wikilink format, one-source-per-book, section type enforcement
|
||||||
|
5. Source content (the chunk)
|
||||||
|
|
||||||
|
Temperature: 0.2 for reproducibility.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Ingestion server (new env vars)
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `INGEST_LLM_URL` | `http://iguana:4000/v1` | OpenAI-compatible endpoint |
|
||||||
|
| `INGEST_LLM_KEY` | (empty) | API key |
|
||||||
|
| `INGEST_LLM_MODEL` | `koala/qwen35-9b-fast` | Model name |
|
||||||
|
| `INGEST_LLM_TIMEOUT` | `15` | LLM call timeout (minutes) |
|
||||||
|
| `INGEST_CHUNK_SIZE` | `6000` | Max chars per LLM call (0 = no chunking) |
|
||||||
|
| `INGEST_WATCH_INTERVAL` | `30` | Watcher poll interval in seconds (0 = disabled) |
|
||||||
|
|
||||||
|
### Supervisor (new env vars + wiring)
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `INGEST_SVC_URL` | (empty) | URL of ingestion server for `brain_ingest` |
|
||||||
|
| `KB_RETRIEVAL_URL` | (empty) | URL of KB retrieval server for `brain_search` |
|
||||||
|
|
||||||
|
`config.go` gets two new fields. `main.go` passes them to `brain.New()`. Both tools are only registered as MCP tools when the respective URL is configured (already implemented in `skill.go`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
| Package | What is tested |
|
||||||
|
|---|---|
|
||||||
|
| `wiki/` | Slug generation (edge cases: apostrophes, colons, version strings), merge logic (bullets union, append, keep-first), inventory loading from temp dir, truncation recovery (valid partial JSON), index rebuild output |
|
||||||
|
| `pipeline/` | Integration test: temp brain dir + mock LLM HTTP server returning fixture JSON; verify files written to correct paths, index rebuilt, log appended |
|
||||||
|
| `api/` | Handler tests for `/ingest` and `/ingest-path` using mock pipeline; 400 on missing fields, 200 with expected response shape |
|
||||||
|
| `watcher/` | File placed in `brain/raw/` is moved to `processed/` on mock-pipeline success; moved to `failed/` on error |
|
||||||
|
|
||||||
|
All tests are table-driven. No real LLM calls in tests.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Out of Scope
|
||||||
|
|
||||||
|
- Python validation/correction loop (can be added later; the LLM prompt enforces schema rules as non-negotiable instructions)
|
||||||
|
- `brain/training-data/` — trainer worker concern
|
||||||
|
- `brain/sessions/` — retrospective/sessionlog concern
|
||||||
|
- Upload endpoint (multipart HTTP) — `scp`/rsync to `brain/raw/` + watcher covers this
|
||||||
|
- Qdrant vector indexing — `brain_search` calls a separate KB retrieval service; ingestion does not write to Qdrant
|
||||||
@@ -0,0 +1,148 @@
|
|||||||
|
# Level 3: Strip Slug Authority from LLM — Design Spec
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
The ingestion pipeline currently asks the LLM to produce full wiki pages including the file path (e.g. `wiki/sources/finbert-huggingface.md`). This causes two classes of bug:
|
||||||
|
|
||||||
|
1. **Slug proliferation** — the LLM invents different slugs for the same concept across chunks or runs, producing duplicate pages that diverge in content.
|
||||||
|
2. **Unstable paths** — the LLM may shorten, expand, or vary titles, making deduplication via `Resolve` unreliable because the slug mismatch is upstream of the normalizer.
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
|
||||||
|
Strip slug authority from the LLM entirely. The LLM returns a minimal structured object. The pipeline computes all slugs deterministically from titles using `wiki.Slug(title)`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## LLM JSON Contract
|
||||||
|
|
||||||
|
### Output format (per page)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "FinBERT",
|
||||||
|
"type": "concept",
|
||||||
|
"subtype": "framework",
|
||||||
|
"domain": "ai-llm",
|
||||||
|
"content": "## Definition\n\nA BERT-based model fine-tuned for financial sentiment...\n\n## Related\n\n- [[Sentiment Analysis]]\n- [[Hugging Face]]\n"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Fields:**
|
||||||
|
|
||||||
|
| Field | Required | Values |
|
||||||
|
|-------|----------|--------|
|
||||||
|
| `title` | yes | Human-readable title, e.g. "FinBERT" |
|
||||||
|
| `type` | yes | `"source"` \| `"concept"` \| `"entity"` |
|
||||||
|
| `subtype` | for entity/source | entity: `person\|company\|tool\|model\|framework\|technology`; source: `article\|pdf\|book\|video\|note\|project` |
|
||||||
|
| `domain` | no | tag string, e.g. `ai-llm`, `finance` |
|
||||||
|
| `content` | yes | Markdown body sections only — no frontmatter, no path |
|
||||||
|
|
||||||
|
**Wikilinks in content:** `[[Display Name]]` only. No slug. The pipeline canonicalizes to `[[slug|Display Name]]` in a post-processing step.
|
||||||
|
|
||||||
|
**The LLM never writes slugs, paths, or frontmatter.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline Changes
|
||||||
|
|
||||||
|
### New type: `RawPage`
|
||||||
|
|
||||||
|
```go
|
||||||
|
type RawPage struct {
|
||||||
|
Title string
|
||||||
|
Type string // "source" | "concept" | "entity"
|
||||||
|
Subtype string
|
||||||
|
Domain string
|
||||||
|
Content string
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### New step order
|
||||||
|
|
||||||
|
```
|
||||||
|
ParseRawPages → BuildPages → Resolve → CanonicalizeLinks → injectSourceRefs → mergeAll → write
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step descriptions
|
||||||
|
|
||||||
|
**`ParseRawPages(output string) ([]RawPage, []string)`**
|
||||||
|
Replaces `ParsePages`. Deserializes JSON objects with the new schema. Same truncation-recovery logic as today. Returns `(pages, warnings)`.
|
||||||
|
|
||||||
|
**`BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page`**
|
||||||
|
Converts `RawPage → wiki.Page`:
|
||||||
|
- Computes slug: `wiki.Slug(page.Title)`
|
||||||
|
- Computes path: `wiki/<type>/<slug>.md`
|
||||||
|
- Assembles frontmatter:
|
||||||
|
```
|
||||||
|
---
|
||||||
|
title: <Title>
|
||||||
|
type: <type>
|
||||||
|
subtype: <subtype> # omitted if empty
|
||||||
|
domain: <domain> # omitted if empty
|
||||||
|
created: <date>
|
||||||
|
source: <sourceSlug> # omitted for the source page itself
|
||||||
|
---
|
||||||
|
```
|
||||||
|
- Concatenates frontmatter + content
|
||||||
|
|
||||||
|
**`Resolve(pages []wiki.Page, inventory) []wiki.Page`**
|
||||||
|
Unchanged. Normalizes near-duplicate titles to existing inventory slugs.
|
||||||
|
|
||||||
|
**`CanonicalizeLinks(pages []wiki.Page, inventory) ([]wiki.Page, []string)`**
|
||||||
|
New. Builds a title→slug map from inventory + current batch. Replaces `[[Display Name]]` with `[[slug|Display Name]]` in each page's content. Titles with no known slug are left as-is and returned as warnings.
|
||||||
|
|
||||||
|
**`injectSourceRefs`**
|
||||||
|
Unchanged. Reads `[[slug|...]]` links (post-canonicalization) to inject back-references.
|
||||||
|
|
||||||
|
**`mergeAll → write`**
|
||||||
|
Unchanged.
|
||||||
|
|
||||||
|
### `pipeline.Run` signature change
|
||||||
|
|
||||||
|
```go
|
||||||
|
func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryRun bool) (Result, error)
|
||||||
|
```
|
||||||
|
|
||||||
|
`source` is already passed (it's the display name / filename). A new internal `sourceSlug` is derived from it via `wiki.Slug(source)` before calling `BuildPages`. No API change needed.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Changed
|
||||||
|
|
||||||
|
| File | Change |
|
||||||
|
|------|--------|
|
||||||
|
| `ingestion/internal/pipeline/parse.go` | Replace `ParsePages` with `ParseRawPages` + `RawPage` type |
|
||||||
|
| `ingestion/internal/pipeline/build.go` | New file: `BuildPages` |
|
||||||
|
| `ingestion/internal/pipeline/links.go` | New file: `CanonicalizeLinks` |
|
||||||
|
| `ingestion/internal/pipeline/pipeline.go` | Wire new steps; derive `sourceSlug` from `source` |
|
||||||
|
| `ingestion/internal/pipeline/prompt.go` | New system prompt + `BuildPrompt` for new JSON format |
|
||||||
|
| `brain/schema.md` | Update wikilink format and JSON schema docs |
|
||||||
|
|
||||||
|
`resolve.go`, `refs.go`, `backfill.go`, `merge.go` — no changes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Wikilink Format
|
||||||
|
|
||||||
|
- **LLM output**: `[[Display Name]]`
|
||||||
|
- **Stored on disk**: `[[slug|Display Name]]`
|
||||||
|
- **`CanonicalizeLinks`** converts between the two using the inventory
|
||||||
|
|
||||||
|
This matches Obsidian's display-alias syntax that the existing codebase already uses.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Testing Strategy
|
||||||
|
|
||||||
|
- `ParseRawPages`: table-driven, cover valid JSON, truncated output, unknown type, missing title
|
||||||
|
- `BuildPages`: table-driven, cover slug computation, frontmatter assembly, source page (no `source:` field), entity with subtype
|
||||||
|
- `CanonicalizeLinks`: cover known title → replaced, unknown title → left as-is + warning, multiple links in one page
|
||||||
|
- Integration test: full `Run` call with mock LLM returning new JSON format, assert no slug duplication across two chunks of the same source
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Out of Scope
|
||||||
|
|
||||||
|
- Re-ingesting existing pages (user will trigger manually after deploy)
|
||||||
|
- Changing the `BackfillRefs` endpoint (already correct, slug-based)
|
||||||
|
- Changing the `Resolve` fuzzy-match algorithm
|
||||||
36
ingestion/Dockerfile
Normal file
36
ingestion/Dockerfile
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# syntax=docker/dockerfile:1
|
||||||
|
|
||||||
|
FROM golang:1.26-bookworm AS builder
|
||||||
|
|
||||||
|
ARG VERSION=dev
|
||||||
|
WORKDIR /src
|
||||||
|
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||||
|
go build -trimpath -ldflags="-s -w" \
|
||||||
|
-o /out/ingestion ./cmd/server
|
||||||
|
|
||||||
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
RUN apk add --no-cache poppler-utils
|
||||||
|
|
||||||
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# brain/ is writable state — mount a PersistentVolume here
|
||||||
|
VOLUME /app/brain
|
||||||
|
|
||||||
|
ENV INGEST_BRAIN_DIR=/app/brain
|
||||||
|
ENV INGEST_PORT=3300
|
||||||
|
|
||||||
|
USER ingestion
|
||||||
|
|
||||||
|
EXPOSE 3300
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/ingestion"]
|
||||||
@@ -2,34 +2,87 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/watcher"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func envOr(key, fallback string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
|
func envInt(key string, fallback int) int {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
if n, err := strconv.Atoi(v); err == nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
|
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
|
||||||
|
|
||||||
brainDir := os.Getenv("INGEST_BRAIN_DIR")
|
brainDir := envOr("INGEST_BRAIN_DIR", "../brain")
|
||||||
if brainDir == "" {
|
port := envOr("INGEST_PORT", "3300")
|
||||||
brainDir = "../brain"
|
|
||||||
|
llmURL := envOr("INGEST_LLM_URL", "http://iguana:4000/v1")
|
||||||
|
llmKey := os.Getenv("INGEST_LLM_KEY")
|
||||||
|
llmModel := envOr("INGEST_LLM_MODEL", "koala/qwen35-9b-fast")
|
||||||
|
llmTimeoutMins := envInt("INGEST_LLM_TIMEOUT", 15)
|
||||||
|
chunkSize := envInt("INGEST_CHUNK_SIZE", 6000)
|
||||||
|
watchInterval := envInt("INGEST_WATCH_INTERVAL", 30)
|
||||||
|
|
||||||
|
llmClient := llm.New(llmURL, llmKey, llmModel, time.Duration(llmTimeoutMins)*time.Minute)
|
||||||
|
|
||||||
|
pipelineCfg := pipeline.Config{
|
||||||
|
Complete: llmClient.Complete,
|
||||||
|
ChunkSize: chunkSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
port := os.Getenv("INGEST_PORT")
|
h := api.NewHandler(brainDir, logger, pipelineCfg)
|
||||||
if port == "" {
|
|
||||||
port = "3300"
|
|
||||||
}
|
|
||||||
|
|
||||||
h := api.NewHandler(brainDir, logger)
|
ctx := context.Background()
|
||||||
|
if watchInterval > 0 {
|
||||||
|
watcher.Start(ctx, watcher.Config{
|
||||||
|
BrainDir: brainDir,
|
||||||
|
Interval: time.Duration(watchInterval) * time.Second,
|
||||||
|
Pipeline: pipelineCfg,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/query", h.Query)
|
mux.HandleFunc("POST /query", h.Query)
|
||||||
mux.HandleFunc("/write", h.Write)
|
mux.HandleFunc("POST /write", h.Write)
|
||||||
|
mux.HandleFunc("POST /ingest", h.Ingest)
|
||||||
|
mux.HandleFunc("POST /ingest-path", h.IngestPath)
|
||||||
|
mux.HandleFunc("POST /backfill-refs", h.BackfillRefs)
|
||||||
|
|
||||||
addr := ":" + port
|
addr := ":" + port
|
||||||
logger.Info("ingestion server starting", "addr", addr, "brain_dir", brainDir)
|
watchIntervalLog := "disabled"
|
||||||
|
if watchInterval > 0 {
|
||||||
|
watchIntervalLog = fmt.Sprintf("%ds", watchInterval)
|
||||||
|
}
|
||||||
|
logger.Info("ingestion server starting",
|
||||||
|
"addr", addr,
|
||||||
|
"brain_dir", brainDir,
|
||||||
|
"llm_url", llmURL,
|
||||||
|
"llm_model", llmModel,
|
||||||
|
"chunk_size", chunkSize,
|
||||||
|
"watch_interval", watchIntervalLog,
|
||||||
|
)
|
||||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||||
logger.Error("server stopped", "err", err)
|
logger.Error("server stopped", "err", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,11 +20,15 @@ import (
|
|||||||
type Handler struct {
|
type Handler struct {
|
||||||
brainDir string
|
brainDir string
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
|
pipeline pipeline.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHandler constructs a Handler. brainDir is the absolute path to brain/.
|
// NewHandler constructs a Handler. brainDir is the absolute path to brain/.
|
||||||
func NewHandler(brainDir string, logger *slog.Logger) *Handler {
|
func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Config) *Handler {
|
||||||
return &Handler{brainDir: brainDir, logger: logger}
|
if logger == nil {
|
||||||
|
logger = slog.Default()
|
||||||
|
}
|
||||||
|
return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg}
|
||||||
}
|
}
|
||||||
|
|
||||||
type queryRequest struct {
|
type queryRequest struct {
|
||||||
@@ -37,15 +43,32 @@ type writeRequest struct {
|
|||||||
Domain string `json:"domain,omitempty"`
|
Domain string `json:"domain,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ingestRequest struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
DryRun bool `json:"dry_run"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ingestPathRequest struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
DryRun bool `json:"dry_run"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ingestResponse struct {
|
||||||
|
Pages []string `json:"pages"`
|
||||||
|
Warnings []string `json:"warnings"`
|
||||||
|
}
|
||||||
|
|
||||||
// Query handles POST /query — full-text search across the brain wiki.
|
// Query handles POST /query — full-text search across the brain wiki.
|
||||||
func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
|
func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
|
||||||
var req queryRequest
|
var req queryRequest
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
http.Error(w, "invalid JSON", http.StatusBadRequest)
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(req.Query) == "" {
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
http.Error(w, "query is required", http.StatusBadRequest)
|
writeError(w, http.StatusBadRequest, "query is required")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if req.Limit == 0 {
|
if req.Limit == 0 {
|
||||||
@@ -55,22 +78,22 @@ func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
|
|||||||
results, err := search.Query(h.brainDir, req.Query, req.Limit)
|
results, err := search.Query(h.brainDir, req.Query, req.Limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.logger.Error("query failed", "err", err)
|
h.logger.Error("query failed", "err", err)
|
||||||
http.Error(w, "search error", http.StatusInternalServerError)
|
writeError(w, http.StatusInternalServerError, "search error")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
writeJSON(w, map[string]any{"results": results})
|
writeJSON(w, map[string]any{"results": results})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write handles POST /write — write raw content to brain/raw/.
|
// Write handles POST /write — write raw content to brain/knowledge/.
|
||||||
func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
||||||
var req writeRequest
|
var req writeRequest
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
http.Error(w, "invalid JSON", http.StatusBadRequest)
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if req.Content == "" {
|
if req.Content == "" {
|
||||||
http.Error(w, "content is required", http.StatusBadRequest)
|
writeError(w, http.StatusBadRequest, "content is required")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,9 +102,9 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
|||||||
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
|
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
|
||||||
}
|
}
|
||||||
|
|
||||||
rawDir := filepath.Join(h.brainDir, "raw")
|
rawDir := filepath.Join(h.brainDir, "knowledge")
|
||||||
if err := os.MkdirAll(rawDir, 0o755); err != nil {
|
if err := os.MkdirAll(rawDir, 0o755); err != nil {
|
||||||
http.Error(w, "failed to create raw dir", http.StatusInternalServerError)
|
writeError(w, http.StatusInternalServerError, "failed to create raw dir")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,10 +122,18 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
|||||||
finalContent = fm.String() + req.Content
|
finalContent = fm.String() + req.Content
|
||||||
}
|
}
|
||||||
|
|
||||||
dest := filepath.Join(rawDir, filepath.Base(filename))
|
base := filepath.Base(filename)
|
||||||
|
if !strings.HasSuffix(base, ".md") {
|
||||||
|
base += ".md"
|
||||||
|
}
|
||||||
|
dest := filepath.Join(rawDir, base)
|
||||||
|
if !strings.HasPrefix(filepath.Clean(dest)+string(os.PathSeparator), filepath.Clean(rawDir)+string(os.PathSeparator)) {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid filename")
|
||||||
|
return
|
||||||
|
}
|
||||||
if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil {
|
if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil {
|
||||||
h.logger.Error("write failed", "err", err)
|
h.logger.Error("write failed", "err", err)
|
||||||
http.Error(w, "write error", http.StatusInternalServerError)
|
writeError(w, http.StatusInternalServerError, "write error")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,7 +141,156 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJSON(w, map[string]string{"path": filepath.ToSlash(rel)})
|
writeJSON(w, map[string]string{"path": filepath.ToSlash(rel)})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ingest handles POST /ingest — run the pipeline on provided content.
|
||||||
|
func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ingestRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Content) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "content is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Source) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "source is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := pipeline.Run(r.Context(), h.pipeline, h.brainDir, req.Content, req.Source, req.DryRun)
|
||||||
|
if err != nil {
|
||||||
|
h.logger.Error("ingest failed", "source", req.Source, "err", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, "ingest error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
pages := result.Pages
|
||||||
|
if pages == nil {
|
||||||
|
pages = []string{}
|
||||||
|
}
|
||||||
|
warnings := result.Warnings
|
||||||
|
if warnings == nil {
|
||||||
|
warnings = []string{}
|
||||||
|
}
|
||||||
|
writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings})
|
||||||
|
}
|
||||||
|
|
||||||
|
// supportedExtensions lists file extensions that IngestPath will process.
|
||||||
|
var supportedExtensions = map[string]bool{
|
||||||
|
".md": true,
|
||||||
|
".txt": true,
|
||||||
|
".pdf": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// IngestPath handles POST /ingest-path — ingest a file or directory.
|
||||||
|
func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ingestPathRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Path) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "path is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := os.Stat(req.Path)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, fmt.Sprintf("path not accessible: %v", err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var allPages []string
|
||||||
|
var allWarnings []string
|
||||||
|
|
||||||
|
if info.IsDir() {
|
||||||
|
err = filepath.WalkDir(req.Path, func(path string, d os.DirEntry, walkErr error) error {
|
||||||
|
if walkErr != nil {
|
||||||
|
return walkErr
|
||||||
|
}
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
ext := strings.ToLower(filepath.Ext(path))
|
||||||
|
if !supportedExtensions[ext] {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
content, readErr := extract.Text(path)
|
||||||
|
if readErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
|
if runErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
allPages = append(allPages, result.Pages...)
|
||||||
|
allWarnings = append(allWarnings, result.Warnings...)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
h.logger.Error("walk dir failed", "path", req.Path, "err", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("walk error: %v", err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ext := strings.ToLower(filepath.Ext(req.Path))
|
||||||
|
if !supportedExtensions[ext] {
|
||||||
|
writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
content, readErr := extract.Text(req.Path)
|
||||||
|
if readErr != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(req.Path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
|
if runErr != nil {
|
||||||
|
h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr)
|
||||||
|
writeError(w, http.StatusInternalServerError, "ingest error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
allPages = result.Pages
|
||||||
|
allWarnings = result.Warnings
|
||||||
|
}
|
||||||
|
|
||||||
|
if allPages == nil {
|
||||||
|
allPages = []string{}
|
||||||
|
}
|
||||||
|
if allWarnings == nil {
|
||||||
|
allWarnings = []string{}
|
||||||
|
}
|
||||||
|
writeJSON(w, ingestResponse{Pages: allPages, Warnings: allWarnings})
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackfillRefs handles POST /backfill-refs — injects source back-references
|
||||||
|
// into all concept and entity pages based on existing wiki/sources/ pages.
|
||||||
|
func (h *Handler) BackfillRefs(w http.ResponseWriter, r *http.Request) {
|
||||||
|
n, err := pipeline.BackfillRefs(r.Context(), h.brainDir)
|
||||||
|
if err != nil {
|
||||||
|
h.logger.Error("backfill-refs failed", "err", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, "backfill error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]int{"updated": n})
|
||||||
|
}
|
||||||
|
|
||||||
func writeJSON(w http.ResponseWriter, v any) {
|
func writeJSON(w http.ResponseWriter, v any) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(v) //nolint:errcheck
|
json.NewEncoder(w).Encode(v) //nolint:errcheck
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func writeError(w http.ResponseWriter, code int, msg string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(code)
|
||||||
|
json.NewEncoder(w).Encode(map[string]string{"error": msg}) //nolint:errcheck
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package api_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -12,25 +13,43 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// stubComplete returns a fixed JSON RawPage so tests never call a real LLM.
|
||||||
|
func stubComplete(_ context.Context, _, _ string) (string, error) {
|
||||||
|
return `[{"title":"Test Source","type":"source","subtype":"article","content":"## Summary\n\nSome content here.\n"}]`, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func stubPipelineCfg() pipeline.Config {
|
||||||
|
return pipeline.Config{
|
||||||
|
Complete: stubComplete,
|
||||||
|
ChunkSize: 0,
|
||||||
|
Schema: "# Test Schema\nwiki/sources/, wiki/concepts/, wiki/entities/",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func setup(t *testing.T) (string, *api.Handler) {
|
func setup(t *testing.T) (string, *api.Handler) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "raw"), 0o755))
|
|
||||||
require.NoError(t, os.WriteFile(
|
require.NoError(t, os.WriteFile(
|
||||||
filepath.Join(dir, "wiki", "concepts", "tdd.md"),
|
filepath.Join(dir, "knowledge", "tdd.md"),
|
||||||
[]byte("---\ntitle: TDD\ndomain: software\n---\n\nTest-driven development is a discipline.\n"),
|
[]byte("---\ntitle: TDD\ndomain: software\n---\n\nTest-driven development is a discipline.\n"),
|
||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
logger := slog.New(slog.NewTextHandler(os.Stderr, nil))
|
logger := slog.New(slog.NewTextHandler(os.Stderr, nil))
|
||||||
return dir, api.NewHandler(dir, logger)
|
return dir, api.NewHandler(dir, logger, stubPipelineCfg())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Existing tests (Write / Query)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
func TestQuery_ReturnsResults(t *testing.T) {
|
func TestQuery_ReturnsResults(t *testing.T) {
|
||||||
_, h := setup(t)
|
_, h := setup(t)
|
||||||
body, _ := json.Marshal(map[string]any{"query": "test driven", "limit": 5})
|
body, _ := json.Marshal(map[string]any{"query": "test driven", "limit": 5})
|
||||||
@@ -46,7 +65,7 @@ func TestQuery_ReturnsResults(t *testing.T) {
|
|||||||
assert.NotEmpty(t, results)
|
assert.NotEmpty(t, results)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWrite_CreatesRawFile(t *testing.T) {
|
func TestWrite_CreatesKnowledgeFile(t *testing.T) {
|
||||||
dir, h := setup(t)
|
dir, h := setup(t)
|
||||||
body, _ := json.Marshal(map[string]any{
|
body, _ := json.Marshal(map[string]any{
|
||||||
"content": "# Test note\n\nSome content.",
|
"content": "# Test note\n\nSome content.",
|
||||||
@@ -62,8 +81,7 @@ func TestWrite_CreatesRawFile(t *testing.T) {
|
|||||||
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
||||||
assert.NotEmpty(t, resp["path"])
|
assert.NotEmpty(t, resp["path"])
|
||||||
|
|
||||||
written := filepath.Join(dir, "raw", "test-note.md")
|
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "test-note.md"))
|
||||||
content, err := os.ReadFile(written)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Contains(t, string(content), "Some content.")
|
assert.Contains(t, string(content), "Some content.")
|
||||||
}
|
}
|
||||||
@@ -93,7 +111,7 @@ func TestWrite_IncludesFrontmatterWhenTypeProvided(t *testing.T) {
|
|||||||
h.Write(rec, req)
|
h.Write(rec, req)
|
||||||
|
|
||||||
assert.Equal(t, http.StatusOK, rec.Code)
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
content, err := os.ReadFile(filepath.Join(dir, "raw", "typed-note.md"))
|
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "typed-note.md"))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Contains(t, string(content), "type: concept")
|
assert.Contains(t, string(content), "type: concept")
|
||||||
assert.Contains(t, string(content), "domain: software")
|
assert.Contains(t, string(content), "domain: software")
|
||||||
@@ -109,7 +127,127 @@ func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) {
|
|||||||
h.Write(rec, req)
|
h.Write(rec, req)
|
||||||
|
|
||||||
assert.Equal(t, http.StatusOK, rec.Code)
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
entries, _ := os.ReadDir(filepath.Join(dir, "raw"))
|
entries, _ := os.ReadDir(filepath.Join(dir, "knowledge"))
|
||||||
assert.Len(t, entries, 1)
|
// +1 because setup already wrote tdd.md
|
||||||
assert.True(t, strings.HasSuffix(entries[0].Name(), ".md"))
|
assert.Len(t, entries, 2)
|
||||||
|
assert.True(t, strings.HasSuffix(entries[1].Name(), ".md"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// POST /ingest
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestIngest_Validation(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
body map[string]any
|
||||||
|
}{
|
||||||
|
{"missing content", map[string]any{"source": "test-source"}},
|
||||||
|
{"missing source", map[string]any{"content": "some content"}},
|
||||||
|
{"whitespace content", map[string]any{"content": " ", "source": "test-source"}},
|
||||||
|
{"whitespace source", map[string]any{"content": "some content", "source": " "}},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
_, h := setup(t)
|
||||||
|
body, _ := json.Marshal(tc.body)
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/ingest", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.Ingest(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIngest_Success(t *testing.T) {
|
||||||
|
_, h := setup(t)
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"content": "some content about shape-up methodology",
|
||||||
|
"source": "shape-up-book",
|
||||||
|
"dry_run": true,
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/ingest", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.Ingest(rec, req)
|
||||||
|
|
||||||
|
require.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
var resp map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
||||||
|
pages, ok := resp["pages"]
|
||||||
|
require.True(t, ok, "response must have pages field")
|
||||||
|
pagesSlice, ok := pages.([]any)
|
||||||
|
require.True(t, ok, "pages must be an array")
|
||||||
|
assert.NotEmpty(t, pagesSlice)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// POST /ingest-path
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestIngestPath_MissingPath(t *testing.T) {
|
||||||
|
_, h := setup(t)
|
||||||
|
body, _ := json.Marshal(map[string]any{"source": "test-source"})
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.IngestPath(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusBadRequest, rec.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIngestPath_File(t *testing.T) {
|
||||||
|
_, h := setup(t)
|
||||||
|
|
||||||
|
// Create a temp file with content
|
||||||
|
dir := t.TempDir()
|
||||||
|
f := filepath.Join(dir, "doc.md")
|
||||||
|
require.NoError(t, os.WriteFile(f, []byte("# Hello\nThis is markdown content."), 0o644))
|
||||||
|
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"path": f,
|
||||||
|
"source": "test-doc",
|
||||||
|
"dry_run": true,
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.IngestPath(rec, req)
|
||||||
|
|
||||||
|
require.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
var resp map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
||||||
|
pages, ok := resp["pages"]
|
||||||
|
require.True(t, ok, "response must have pages field")
|
||||||
|
pagesSlice, ok := pages.([]any)
|
||||||
|
require.True(t, ok, "pages must be an array")
|
||||||
|
assert.NotEmpty(t, pagesSlice)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIngestPath_Directory(t *testing.T) {
|
||||||
|
_, h := setup(t)
|
||||||
|
|
||||||
|
// Create a temp dir with one .md file
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(dir, "notes.md"), []byte("# Notes\nSome notes."), 0o644))
|
||||||
|
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"path": dir,
|
||||||
|
"dry_run": true,
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/ingest-path", bytes.NewReader(body))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
|
||||||
|
h.IngestPath(rec, req)
|
||||||
|
|
||||||
|
require.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
var resp map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
||||||
|
pages, ok := resp["pages"]
|
||||||
|
require.True(t, ok, "response must have pages field")
|
||||||
|
pagesSlice, ok := pages.([]any)
|
||||||
|
require.True(t, ok, "pages must be an array")
|
||||||
|
assert.NotEmpty(t, pagesSlice)
|
||||||
}
|
}
|
||||||
|
|||||||
39
ingestion/internal/extract/extract.go
Normal file
39
ingestion/internal/extract/extract.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
// ingestion/internal/extract/extract.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Text reads the file at path and returns its plain-text content.
|
||||||
|
// Supported extensions: .md, .txt (passthrough), .pdf (via pdftotext).
|
||||||
|
func Text(path string) (string, error) {
|
||||||
|
ext := strings.ToLower(fileExt(path))
|
||||||
|
switch ext {
|
||||||
|
case ".md", ".txt":
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
case ".pdf":
|
||||||
|
return extractPDF(path)
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unsupported file extension: %s", ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fileExt returns the file extension including the dot, lowercased.
|
||||||
|
func fileExt(path string) string {
|
||||||
|
for i := len(path) - 1; i >= 0; i-- {
|
||||||
|
if path[i] == '.' {
|
||||||
|
return path[i:]
|
||||||
|
}
|
||||||
|
if path[i] == '/' || path[i] == '\\' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
62
ingestion/internal/extract/extract_test.go
Normal file
62
ingestion/internal/extract/extract_test.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
// ingestion/internal/extract/extract_test.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestText_Markdown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.md")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("# Hello\n\nWorld."), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "# Hello\n\nWorld.", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_Txt(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.txt")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("plain text"), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "plain text", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_UnsupportedExtension(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "data.csv")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a,b,c"), 0o644))
|
||||||
|
|
||||||
|
_, err := Text(path)
|
||||||
|
assert.ErrorContains(t, err, "unsupported")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_PDF(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("pdftotext"); err != nil {
|
||||||
|
t.Skip("pdftotext not available")
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
pdfPath := filepath.Join(dir, "test.pdf")
|
||||||
|
|
||||||
|
// Minimal valid PDF containing the text "Hello PDF".
|
||||||
|
minimalPDF := "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n" +
|
||||||
|
"2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n" +
|
||||||
|
"3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>>>>>>>endobj\n" +
|
||||||
|
"4 0 obj<</Length 44>>\nstream\nBT /F1 12 Tf 100 700 Td (Hello PDF) Tj ET\nendstream\nendobj\n" +
|
||||||
|
"xref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000058 00000 n\n0000000115 00000 n\n0000000310 00000 n\n" +
|
||||||
|
"trailer<</Size 5/Root 1 0 R>>\nstartxref\n406\n%%EOF\n"
|
||||||
|
require.NoError(t, os.WriteFile(pdfPath, []byte(minimalPDF), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(pdfPath)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, got, "Hello PDF")
|
||||||
|
}
|
||||||
28
ingestion/internal/extract/pdf.go
Normal file
28
ingestion/internal/extract/pdf.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractPDF runs pdftotext on path and returns the extracted text.
|
||||||
|
// pdftotext must be installed (package: poppler-utils on Alpine/Debian, poppler on Homebrew).
|
||||||
|
func extractPDF(path string) (string, error) {
|
||||||
|
cmd := exec.Command("pdftotext", "-q", path, "-")
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
errMsg := strings.TrimSpace(stderr.String())
|
||||||
|
if errMsg == "" {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("pdftotext: %s", errMsg)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(stdout.String()), nil
|
||||||
|
}
|
||||||
119
ingestion/internal/llm/client.go
Normal file
119
ingestion/internal/llm/client.go
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client calls an OpenAI-compatible chat completions endpoint.
|
||||||
|
type Client struct {
|
||||||
|
baseURL string
|
||||||
|
apiKey string
|
||||||
|
model string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// New constructs a Client.
|
||||||
|
func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
|
||||||
|
return &Client{
|
||||||
|
baseURL: strings.TrimRight(baseURL, "/"),
|
||||||
|
apiKey: apiKey,
|
||||||
|
model: model,
|
||||||
|
httpClient: &http.Client{Timeout: timeout},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type chatRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Messages []message `json:"messages"`
|
||||||
|
Temperature float64 `json:"temperature"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type message struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type chatResponse struct {
|
||||||
|
Choices []struct {
|
||||||
|
Message message `json:"message"`
|
||||||
|
} `json:"choices"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Complete sends a system + user message and returns the assistant's reply.
|
||||||
|
// Retries once on HTTP 429 using Retry-After header or 5s backoff.
|
||||||
|
func (c *Client) Complete(ctx context.Context, system, user string) (string, error) {
|
||||||
|
body := chatRequest{
|
||||||
|
Model: c.model,
|
||||||
|
Messages: []message{
|
||||||
|
{Role: "system", Content: system},
|
||||||
|
{Role: "user", Content: user},
|
||||||
|
},
|
||||||
|
Temperature: 0.2,
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
do := func() (*http.Response, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/chat/completions", bytes.NewReader(b))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
if c.apiKey != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
||||||
|
}
|
||||||
|
return c.httpClient.Do(req)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := do()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("call LLM: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode == http.StatusTooManyRequests {
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
wait := 5 * time.Second
|
||||||
|
if ra := resp.Header.Get("Retry-After"); ra != "" {
|
||||||
|
if secs, err := strconv.Atoi(ra); err == nil {
|
||||||
|
wait = time.Duration(secs) * time.Second
|
||||||
|
}
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return "", ctx.Err()
|
||||||
|
case <-time.After(wait):
|
||||||
|
}
|
||||||
|
resp, err = do()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("retry LLM call: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
out, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read response: %w", err)
|
||||||
|
}
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("LLM returned %d: %s", resp.StatusCode, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
var cr chatResponse
|
||||||
|
if err := json.Unmarshal(out, &cr); err != nil {
|
||||||
|
return "", fmt.Errorf("parse response: %w", err)
|
||||||
|
}
|
||||||
|
if len(cr.Choices) == 0 {
|
||||||
|
return "", fmt.Errorf("LLM returned no choices")
|
||||||
|
}
|
||||||
|
return cr.Choices[0].Message.Content, nil
|
||||||
|
}
|
||||||
86
ingestion/internal/llm/client_test.go
Normal file
86
ingestion/internal/llm/client_test.go
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mockServer(t *testing.T, response string) *httptest.Server {
|
||||||
|
t.Helper()
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/chat/completions", r.URL.Path)
|
||||||
|
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": response}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClient_Complete(t *testing.T) {
|
||||||
|
srv := mockServer(t, "hello world")
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := New(srv.URL, "", "test-model", 10*time.Second)
|
||||||
|
got, err := c.Complete(context.Background(), "you are helpful", "say hello")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "hello world", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClient_ReturnsErrorOnNon200(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "overloaded", http.StatusServiceUnavailable)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := New(srv.URL, "", "test-model", 10*time.Second)
|
||||||
|
_, err := c.Complete(context.Background(), "sys", "user")
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClient_SendsAuthHeader(t *testing.T) {
|
||||||
|
var gotAuth string
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
gotAuth = r.Header.Get("Authorization")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{{"message": map[string]any{"content": "ok"}}},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := New(srv.URL, "my-key", "test-model", 10*time.Second)
|
||||||
|
_, err := c.Complete(context.Background(), "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "Bearer my-key", gotAuth)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClient_Retries429(t *testing.T) {
|
||||||
|
calls := 0
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
calls++
|
||||||
|
if calls == 1 {
|
||||||
|
w.Header().Set("Retry-After", "0")
|
||||||
|
w.WriteHeader(http.StatusTooManyRequests)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{{"message": map[string]any{"content": "retried"}}},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := New(srv.URL, "", "test-model", 10*time.Second)
|
||||||
|
got, err := c.Complete(context.Background(), "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "retried", got)
|
||||||
|
assert.Equal(t, 2, calls)
|
||||||
|
}
|
||||||
91
ingestion/internal/pipeline/backfill.go
Normal file
91
ingestion/internal/pipeline/backfill.go
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
// ingestion/internal/pipeline/backfill.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BackfillRefs walks wiki/sources/ and injects source back-references into every
|
||||||
|
// concept and entity page that each source links to.
|
||||||
|
// Changes for all sources are accumulated in memory before writing, so multiple
|
||||||
|
// sources referencing the same concept are merged in one pass.
|
||||||
|
// Deduplication is handled by wiki.Merge — running this multiple times is safe.
|
||||||
|
// Returns the number of concept/entity pages written.
|
||||||
|
func BackfillRefs(ctx context.Context, brainDir string) (int, error) {
|
||||||
|
inventory, err := wiki.LoadInventory(brainDir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("load inventory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sourcesDir := filepath.Join(brainDir, "wiki", "sources")
|
||||||
|
entries, err := os.ReadDir(sourcesDir)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
return 0, fmt.Errorf("read sources dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accumulate all changes before writing: relPath → updated Page.
|
||||||
|
// Collecting first means two sources that both link the same concept
|
||||||
|
// get both refs merged before a single write.
|
||||||
|
pending := make(map[string]wiki.Page)
|
||||||
|
|
||||||
|
for _, e := range entries {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return 0, ctx.Err()
|
||||||
|
}
|
||||||
|
if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name()))
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sourceContent := string(b)
|
||||||
|
sourceSlug := strings.TrimSuffix(e.Name(), ".md")
|
||||||
|
sourceTitle := extractTitle(sourceContent)
|
||||||
|
if sourceTitle == "" {
|
||||||
|
sourceTitle = sourceSlug
|
||||||
|
}
|
||||||
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||||
|
|
||||||
|
for slug := range extractWikilinks(sourceContent) {
|
||||||
|
if slug == sourceSlug {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pt, ok := findInInventory(slug, inventory)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
relPath := "wiki/" + string(pt) + "/" + slug + ".md"
|
||||||
|
|
||||||
|
// Start from already-accumulated version if we've seen this page.
|
||||||
|
page, seen := pending[relPath]
|
||||||
|
if !seen {
|
||||||
|
raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath)))
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
page = wiki.Page{Path: relPath, Content: string(raw)}
|
||||||
|
}
|
||||||
|
pending[relPath] = addSourceRef(page, sourceRef)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for relPath, page := range pending {
|
||||||
|
dest := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
||||||
|
if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil {
|
||||||
|
return 0, fmt.Errorf("write %s: %w", relPath, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return len(pending), nil
|
||||||
|
}
|
||||||
107
ingestion/internal/pipeline/backfill_test.go
Normal file
107
ingestion/internal/pipeline/backfill_test.go
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
// ingestion/internal/pipeline/backfill_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setupBrainDir(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
for _, sub := range []string{"wiki/sources", "wiki/concepts", "wiki/entities"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, sub), 0o755))
|
||||||
|
}
|
||||||
|
return dir
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeFile(t *testing.T, path, content string) {
|
||||||
|
t.Helper()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBackfillRefs_UpdatesConcept(t *testing.T) {
|
||||||
|
dir := setupBrainDir(t)
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/sources/shape-up.md"),
|
||||||
|
"---\ntitle: Shape Up\n---\n\n## Summary\n\nSee [[betting|Betting]].\n")
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/concepts/betting.md"),
|
||||||
|
"---\ntitle: Betting\n---\n\n## Definition\n\nA resource allocation technique.\n")
|
||||||
|
|
||||||
|
n, err := BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, n)
|
||||||
|
|
||||||
|
got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/betting.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(got), "## Sources")
|
||||||
|
assert.Contains(t, string(got), "[[shape-up|Shape Up]]")
|
||||||
|
assert.Contains(t, string(got), "## Definition") // original content preserved
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBackfillRefs_Deduplication(t *testing.T) {
|
||||||
|
dir := setupBrainDir(t)
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/sources/shape-up.md"),
|
||||||
|
"---\ntitle: Shape Up\n---\n\n## Summary\n\nSee [[betting|Betting]].\n")
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/concepts/betting.md"),
|
||||||
|
"---\ntitle: Betting\n---\n\n## Definition\n\nA technique.\n")
|
||||||
|
|
||||||
|
// Run twice — should not duplicate the ref.
|
||||||
|
_, err := BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/betting.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
for _, line := range splitLines(string(got)) {
|
||||||
|
if line == "- [[shape-up|Shape Up]]" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, count, "ref should appear exactly once after two runs")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBackfillRefs_MultipleSources(t *testing.T) {
|
||||||
|
dir := setupBrainDir(t)
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/sources/book-a.md"),
|
||||||
|
"---\ntitle: Book A\n---\n\n## Summary\n\nSee [[shaping|Shaping]].\n")
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/sources/book-b.md"),
|
||||||
|
"---\ntitle: Book B\n---\n\n## Summary\n\nAlso [[shaping|Shaping]].\n")
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/concepts/shaping.md"),
|
||||||
|
"---\ntitle: Shaping\n---\n\n## Definition\n\nA design activity.\n")
|
||||||
|
|
||||||
|
n, err := BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, n) // one concept page written
|
||||||
|
|
||||||
|
got, err := os.ReadFile(filepath.Join(dir, "wiki/concepts/shaping.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(got), "[[book-a|Book A]]")
|
||||||
|
assert.Contains(t, string(got), "[[book-b|Book B]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBackfillRefs_NoSourcesDir(t *testing.T) {
|
||||||
|
dir := t.TempDir() // no wiki/sources subdir
|
||||||
|
n, err := BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBackfillRefs_SkipsUnknownSlugs(t *testing.T) {
|
||||||
|
dir := setupBrainDir(t)
|
||||||
|
// Source links to a slug not in inventory and not on disk.
|
||||||
|
writeFile(t, filepath.Join(dir, "wiki/sources/article.md"),
|
||||||
|
"---\ntitle: Article\n---\n\n## Summary\n\nSee [[ghost-slug|Ghost]].\n")
|
||||||
|
|
||||||
|
n, err := BackfillRefs(context.Background(), dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, n)
|
||||||
|
}
|
||||||
106
ingestion/internal/pipeline/build.go
Normal file
106
ingestion/internal/pipeline/build.go
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
// ingestion/internal/pipeline/build.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs,
|
||||||
|
// paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested
|
||||||
|
// (derived from the filename, not the LLM title). Pages whose title resolves to an
|
||||||
|
// empty slug are skipped and returned as warnings instead.
|
||||||
|
func BuildPages(rawPages []RawPage, sourceSlug, date string) ([]wiki.Page, []string) {
|
||||||
|
out := make([]wiki.Page, 0, len(rawPages))
|
||||||
|
var warnings []string
|
||||||
|
for _, rp := range rawPages {
|
||||||
|
slug := computeSlug(rp, sourceSlug)
|
||||||
|
if slug == "" {
|
||||||
|
warnings = append(warnings, fmt.Sprintf("skipped page with empty title (type: %s)", rp.Type))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, buildPage(rp, sourceSlug, date))
|
||||||
|
}
|
||||||
|
return out, warnings
|
||||||
|
}
|
||||||
|
|
||||||
|
func computeSlug(rp RawPage, sourceSlug string) string {
|
||||||
|
if rp.Type == "source" {
|
||||||
|
return sourceSlug
|
||||||
|
}
|
||||||
|
return wiki.Slug(rp.Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
|
||||||
|
var slug, dir string
|
||||||
|
switch rp.Type {
|
||||||
|
case "source":
|
||||||
|
slug = sourceSlug
|
||||||
|
dir = "wiki/sources"
|
||||||
|
case "concept":
|
||||||
|
slug = wiki.Slug(rp.Title)
|
||||||
|
dir = "wiki/concepts"
|
||||||
|
case "entity":
|
||||||
|
slug = wiki.Slug(rp.Title)
|
||||||
|
dir = "wiki/entities"
|
||||||
|
default:
|
||||||
|
slug = wiki.Slug(rp.Title)
|
||||||
|
dir = "wiki/" + rp.Type
|
||||||
|
}
|
||||||
|
|
||||||
|
path := dir + "/" + slug + ".md"
|
||||||
|
fm := buildFrontmatter(rp, date)
|
||||||
|
|
||||||
|
return wiki.Page{
|
||||||
|
Path: path,
|
||||||
|
Content: fm + "\n" + rp.Content,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildFrontmatter(rp RawPage, date string) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("---\n")
|
||||||
|
fmt.Fprintf(&sb, "title: %s\n", yamlScalar(rp.Title))
|
||||||
|
|
||||||
|
switch rp.Type {
|
||||||
|
case "source":
|
||||||
|
subtype := rp.Subtype
|
||||||
|
if subtype == "" {
|
||||||
|
subtype = "article"
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(subtype))
|
||||||
|
if rp.Domain != "" {
|
||||||
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "date_ingested: %s\n", date)
|
||||||
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||||
|
case "concept":
|
||||||
|
if rp.Domain != "" {
|
||||||
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||||
|
case "entity":
|
||||||
|
if rp.Subtype != "" {
|
||||||
|
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(rp.Subtype))
|
||||||
|
}
|
||||||
|
if rp.Domain != "" {
|
||||||
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||||
|
default:
|
||||||
|
if rp.Domain != "" {
|
||||||
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(&sb, "aliases:\n - %s\n", yamlScalar(rp.Title))
|
||||||
|
sb.WriteString("---\n")
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func yamlScalar(s string) string {
|
||||||
|
return "'" + strings.ReplaceAll(s, "'", "''") + "'"
|
||||||
|
}
|
||||||
167
ingestion/internal/pipeline/build_test.go
Normal file
167
ingestion/internal/pipeline/build_test.go
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// ingestion/internal/pipeline/build_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildPages_SourcePage(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{
|
||||||
|
Title: "Shape Up",
|
||||||
|
Type: "source",
|
||||||
|
Subtype: "book",
|
||||||
|
Domain: "product-strategy",
|
||||||
|
Content: "## Summary\n\nA book about shaping product work.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
|
||||||
|
p := pages[0]
|
||||||
|
assert.Equal(t, "wiki/sources/shape-up.md", p.Path)
|
||||||
|
assert.Contains(t, p.Content, "title: 'Shape Up'")
|
||||||
|
assert.Contains(t, p.Content, "type: 'book'")
|
||||||
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
||||||
|
assert.Contains(t, p.Content, "date_ingested: 2026-04-23")
|
||||||
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
||||||
|
assert.Contains(t, p.Content, "aliases:\n - 'Shape Up'")
|
||||||
|
assert.Contains(t, p.Content, "## Summary")
|
||||||
|
assert.True(t, strings.HasPrefix(p.Content, "---\n"), "content must start with frontmatter")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_ConceptPage(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{
|
||||||
|
Title: "Betting",
|
||||||
|
Type: "concept",
|
||||||
|
Domain: "product-strategy",
|
||||||
|
Content: "## Definition\n\nA resource allocation technique.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
|
||||||
|
p := pages[0]
|
||||||
|
assert.Equal(t, "wiki/concepts/betting.md", p.Path)
|
||||||
|
assert.Contains(t, p.Content, "title: 'Betting'")
|
||||||
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
||||||
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
||||||
|
assert.Contains(t, p.Content, "aliases:\n - 'Betting'")
|
||||||
|
assert.NotContains(t, p.Content, "date_ingested")
|
||||||
|
assert.Contains(t, p.Content, "## Definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_EntityPage(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{
|
||||||
|
Title: "Ryan Singer",
|
||||||
|
Type: "entity",
|
||||||
|
Subtype: "person",
|
||||||
|
Domain: "product-strategy",
|
||||||
|
Content: "## Description\n\nA product designer.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
|
||||||
|
p := pages[0]
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path)
|
||||||
|
assert.Contains(t, p.Content, "title: 'Ryan Singer'")
|
||||||
|
assert.Contains(t, p.Content, "type: 'person'")
|
||||||
|
assert.Contains(t, p.Content, "domain: 'product-strategy'")
|
||||||
|
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
|
||||||
|
assert.Contains(t, p.Content, "aliases:\n - 'Ryan Singer'")
|
||||||
|
assert.NotContains(t, p.Content, "date_ingested")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_SourceSlugUsedForSourcePage(t *testing.T) {
|
||||||
|
// LLM title differs from filename — pipeline uses sourceSlug for the source page path.
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "FinBERT: A Pretrained Model", Type: "source", Subtype: "article", Content: "## Summary\n\nA model.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "finbert-huggingface", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Equal(t, "wiki/sources/finbert-huggingface.md", pages[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_ConceptSlugDerivedFromTitle(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Domain-Driven Design", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "some-source", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/domain-driven-design.md", pages[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_SourceDefaultSubtype(t *testing.T) {
|
||||||
|
// If subtype is omitted for a source, default to "article"
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Some Post", Type: "source", Content: "## Summary\n\nA post.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "some-post", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Contains(t, pages[0].Content, "type: 'article'")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.NotContains(t, pages[0].Content, "domain:")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_MultiplePages(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Shape Up", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
|
||||||
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
||||||
|
{Title: "Ryan Singer", Type: "entity", Subtype: "person", Content: "## Description\n\nA designer.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
||||||
|
require.Len(t, pages, 3)
|
||||||
|
assert.Equal(t, "wiki/sources/shape-up.md", pages[0].Path)
|
||||||
|
assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", pages[2].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_TitleWithColon(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Shape Up: The Basecamp Method", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
// Title with colon must be quoted in YAML
|
||||||
|
assert.Contains(t, pages[0].Content, "title: 'Shape Up: The Basecamp Method'")
|
||||||
|
assert.Contains(t, pages[0].Content, "aliases:\n - 'Shape Up: The Basecamp Method'")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_EntityNoSubtype(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "Basecamp", Type: "entity", Content: "## Description\n\nA company.\n"},
|
||||||
|
}
|
||||||
|
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.NotContains(t, pages[0].Content, "type:")
|
||||||
|
assert.Contains(t, pages[0].Content, "title: 'Basecamp'")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildPages_EmptyTitleSkippedWithWarning(t *testing.T) {
|
||||||
|
raw := []RawPage{
|
||||||
|
{Title: "", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||||
|
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
||||||
|
}
|
||||||
|
pages, warnings := BuildPages(raw, "src", "2026-04-23")
|
||||||
|
require.Len(t, pages, 1, "empty-title page should be skipped")
|
||||||
|
assert.Equal(t, "wiki/concepts/betting.md", pages[0].Path)
|
||||||
|
assert.Len(t, warnings, 1)
|
||||||
|
assert.Contains(t, warnings[0], "empty title")
|
||||||
|
}
|
||||||
39
ingestion/internal/pipeline/chunk.go
Normal file
39
ingestion/internal/pipeline/chunk.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
// ingestion/internal/pipeline/chunk.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// Chunk splits content into pieces of at most maxSize bytes, splitting at
|
||||||
|
// paragraph boundaries (\n\n). If maxSize <= 0, returns content as one chunk.
|
||||||
|
func Chunk(content string, maxSize int) []string {
|
||||||
|
content = strings.TrimSpace(content)
|
||||||
|
if maxSize <= 0 || len(content) <= maxSize {
|
||||||
|
return []string{content}
|
||||||
|
}
|
||||||
|
|
||||||
|
paragraphs := strings.Split(content, "\n\n")
|
||||||
|
var chunks []string
|
||||||
|
var cur strings.Builder
|
||||||
|
|
||||||
|
for _, para := range paragraphs {
|
||||||
|
para = strings.TrimSpace(para)
|
||||||
|
if para == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
addition := para
|
||||||
|
if cur.Len() > 0 {
|
||||||
|
addition = "\n\n" + para
|
||||||
|
}
|
||||||
|
if cur.Len() > 0 && cur.Len()+len(addition) > maxSize {
|
||||||
|
chunks = append(chunks, cur.String())
|
||||||
|
cur.Reset()
|
||||||
|
cur.WriteString(para)
|
||||||
|
} else {
|
||||||
|
cur.WriteString(addition)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if cur.Len() > 0 {
|
||||||
|
chunks = append(chunks, cur.String())
|
||||||
|
}
|
||||||
|
return chunks
|
||||||
|
}
|
||||||
36
ingestion/internal/pipeline/chunk_test.go
Normal file
36
ingestion/internal/pipeline/chunk_test.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// ingestion/internal/pipeline/chunk_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestChunk_NoChunkingWhenZero(t *testing.T) {
|
||||||
|
content := strings.Repeat("word ", 1000)
|
||||||
|
chunks := Chunk(content, 0)
|
||||||
|
assert.Len(t, chunks, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunk_SplitsAtParagraph(t *testing.T) {
|
||||||
|
content := "First paragraph here.\n\nSecond paragraph here."
|
||||||
|
chunks := Chunk(content, 40)
|
||||||
|
assert.Len(t, chunks, 2)
|
||||||
|
assert.Equal(t, "First paragraph here.", chunks[0])
|
||||||
|
assert.Equal(t, "Second paragraph here.", chunks[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunk_SingleLargeParagraph(t *testing.T) {
|
||||||
|
content := strings.Repeat("x", 100)
|
||||||
|
chunks := Chunk(content, 50)
|
||||||
|
assert.Len(t, chunks, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunk_NoChunkingWhenContentFits(t *testing.T) {
|
||||||
|
content := "Short content."
|
||||||
|
chunks := Chunk(content, 1000)
|
||||||
|
assert.Len(t, chunks, 1)
|
||||||
|
assert.Equal(t, "Short content.", chunks[0])
|
||||||
|
}
|
||||||
70
ingestion/internal/pipeline/links.go
Normal file
70
ingestion/internal/pipeline/links.go
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
// ingestion/internal/pipeline/links.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// plainLinkRE matches [[Display Name]] — wikilinks without a slug pipe.
|
||||||
|
// It does NOT match [[slug|Display]] (those already have a pipe).
|
||||||
|
var plainLinkRE = regexp.MustCompile(`\[\[([^\]|]+)\]\]`)
|
||||||
|
|
||||||
|
// CanonicalizeLinks converts [[Display Name]] wikilinks to [[slug|Display Name]]
|
||||||
|
// using a title→slug map built from the inventory and current batch.
|
||||||
|
// Unknown titles are left as-is and returned as warnings.
|
||||||
|
func CanonicalizeLinks(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) ([]wiki.Page, []string) {
|
||||||
|
titleToSlug := buildTitleMap(pages, inventory)
|
||||||
|
|
||||||
|
var allWarnings []string
|
||||||
|
out := make([]wiki.Page, len(pages))
|
||||||
|
for i, p := range pages {
|
||||||
|
newContent, warnings := canonicalizeContent(p.Content, titleToSlug)
|
||||||
|
p.Content = newContent
|
||||||
|
out[i] = p
|
||||||
|
allWarnings = append(allWarnings, warnings...)
|
||||||
|
}
|
||||||
|
return out, allWarnings
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildTitleMap builds a lowercase-title → slug map from inventory and current batch.
|
||||||
|
// Current batch entries take precedence over inventory (they may be updates).
|
||||||
|
func buildTitleMap(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
for _, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
m[strings.ToLower(e.Title)] = e.Slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Current batch overrides inventory
|
||||||
|
for _, p := range pages {
|
||||||
|
title := extractTitle(p.Content)
|
||||||
|
slug := strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||||
|
if title != "" && slug != "" {
|
||||||
|
m[strings.ToLower(title)] = slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func canonicalizeContent(content string, titleToSlug map[string]string) (string, []string) {
|
||||||
|
var warnings []string
|
||||||
|
result := plainLinkRE.ReplaceAllStringFunc(content, func(match string) string {
|
||||||
|
sub := plainLinkRE.FindStringSubmatch(match)
|
||||||
|
if len(sub) < 2 {
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
displayName := sub[1]
|
||||||
|
slug, ok := titleToSlug[strings.ToLower(displayName)]
|
||||||
|
if !ok {
|
||||||
|
warnings = append(warnings, fmt.Sprintf("unknown wikilink: [[%s]]", displayName))
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
return "[[" + slug + "|" + displayName + "]]"
|
||||||
|
})
|
||||||
|
return result, warnings
|
||||||
|
}
|
||||||
125
ingestion/internal/pipeline/links_test.go
Normal file
125
ingestion/internal/pipeline/links_test.go
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
// ingestion/internal/pipeline/links_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_KnownTitle(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/shape-up.md",
|
||||||
|
Content: "---\ntitle: 'Shape Up'\n---\n\n## Summary\n\nSee [[Betting]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "betting", Title: "Betting"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Contains(t, got[0].Content, "[[betting|Betting]]")
|
||||||
|
assert.NotContains(t, got[0].Content, "[[Betting]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_UnknownTitleLeftAsIs(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/shape-up.md",
|
||||||
|
Content: "---\ntitle: 'Shape Up'\n---\n\n## Summary\n\nSee [[Ghost Concept]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{}
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.NotEmpty(t, warnings)
|
||||||
|
assert.Contains(t, got[0].Content, "[[Ghost Concept]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_AlreadyCanonicalLinkUntouched(t *testing.T) {
|
||||||
|
// Links already in [[slug|Display]] format must not be double-converted
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/shape-up.md",
|
||||||
|
Content: "---\ntitle: 'Shape Up'\n---\n\n## Summary\n\nSee [[betting|Betting]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "betting", Title: "Betting"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
// Should remain exactly as-is — not double-wrapped
|
||||||
|
assert.Contains(t, got[0].Content, "[[betting|Betting]]")
|
||||||
|
assert.NotContains(t, got[0].Content, "[[betting|[[betting|Betting]]]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_CaseInsensitiveMatch(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/foo.md",
|
||||||
|
Content: "---\ntitle: 'Foo'\n---\n\n## Summary\n\nSee [[domain driven design]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "domain-driven-design", Title: "Domain Driven Design"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Contains(t, got[0].Content, "[[domain-driven-design|domain driven design]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_CurrentBatchPagesResolved(t *testing.T) {
|
||||||
|
// A concept created in the same batch should be canonicalizable
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/shape-up.md",
|
||||||
|
Content: "---\ntitle: 'Shape Up'\n---\n\n## Summary\n\nSee [[Betting]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/betting.md",
|
||||||
|
Content: "---\ntitle: 'Betting'\n---\n\n## Definition\n\nA technique.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{} // empty — Betting is in the batch, not inventory
|
||||||
|
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Contains(t, got[0].Content, "[[betting|Betting]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeLinks_MultipleLinksInOnePage(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/foo.md",
|
||||||
|
Content: "---\ntitle: 'Foo'\n---\n\n## Summary\n\nSee [[Betting]] and [[Shape Up]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "betting", Title: "Betting"},
|
||||||
|
},
|
||||||
|
wiki.PageTypeSource: {
|
||||||
|
{Slug: "shape-up", Title: "Shape Up"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got, warnings := CanonicalizeLinks(pages, inventory)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Contains(t, got[0].Content, "[[betting|Betting]]")
|
||||||
|
assert.Contains(t, got[0].Content, "[[shape-up|Shape Up]]")
|
||||||
|
}
|
||||||
110
ingestion/internal/pipeline/parse.go
Normal file
110
ingestion/internal/pipeline/parse.go
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
// ingestion/internal/pipeline/parse.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RawPage is the LLM's output format — minimal structured data with no path or frontmatter.
|
||||||
|
// The pipeline derives slugs, paths, and frontmatter from these fields.
|
||||||
|
type RawPage struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Type string `json:"type"` // "source" | "concept" | "entity"
|
||||||
|
Subtype string `json:"subtype"` // entity: person|company|tool|model|framework|technology; source: article|pdf|book|video|note|project
|
||||||
|
Domain string `json:"domain"`
|
||||||
|
Content string `json:"content"` // Markdown body only — no frontmatter
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseRawPages parses LLM output as a JSON array of RawPage objects.
|
||||||
|
// If the output contains invalid JSON escape sequences (e.g. \. from Markdown),
|
||||||
|
// it attempts repair before falling back to truncation recovery.
|
||||||
|
func ParseRawPages(output string) ([]RawPage, []string) {
|
||||||
|
output = strings.TrimSpace(output)
|
||||||
|
if output == "" {
|
||||||
|
return nil, []string{"LLM returned empty output"}
|
||||||
|
}
|
||||||
|
|
||||||
|
output = stripFences(output)
|
||||||
|
|
||||||
|
// Fast path: valid JSON.
|
||||||
|
var pages []RawPage
|
||||||
|
if err := json.Unmarshal([]byte(output), &pages); err == nil {
|
||||||
|
return pages, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Repair pass: fix invalid escape sequences (e.g. \. \d from Markdown content).
|
||||||
|
repaired := repairJSON(output)
|
||||||
|
if err := json.Unmarshal([]byte(repaired), &pages); err == nil {
|
||||||
|
return pages, []string{"repaired invalid JSON escape sequences in LLM output"}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncation recovery: find last `}` that closes a complete object.
|
||||||
|
idx := strings.LastIndex(repaired, "}")
|
||||||
|
if idx < 0 {
|
||||||
|
return nil, []string{"LLM output contained no complete JSON objects"}
|
||||||
|
}
|
||||||
|
|
||||||
|
start := strings.Index(repaired, "[")
|
||||||
|
if start < 0 {
|
||||||
|
return nil, []string{"LLM output contained no JSON array opening bracket"}
|
||||||
|
}
|
||||||
|
|
||||||
|
candidate := repaired[start:idx+1] + "]"
|
||||||
|
if err := json.Unmarshal([]byte(candidate), &pages); err != nil {
|
||||||
|
return nil, []string{fmt.Sprintf("truncation recovery failed: %v", err)}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages, []string{fmt.Sprintf("LLM output was truncated; recovered %d page(s)", len(pages))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// repairJSON replaces invalid JSON escape sequences (e.g. \. \d \p) with
|
||||||
|
// a properly escaped backslash followed by the same character.
|
||||||
|
// It iterates byte-by-byte to correctly skip already-valid escape sequences
|
||||||
|
// (including \\) without requiring lookbehind support.
|
||||||
|
func repairJSON(s string) string {
|
||||||
|
var b strings.Builder
|
||||||
|
b.Grow(len(s))
|
||||||
|
i := 0
|
||||||
|
for i < len(s) {
|
||||||
|
if s[i] != '\\' {
|
||||||
|
b.WriteByte(s[i])
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// We have a backslash. Peek at the next character.
|
||||||
|
if i+1 >= len(s) {
|
||||||
|
// Trailing backslash — emit as-is.
|
||||||
|
b.WriteByte(s[i])
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
next := s[i+1]
|
||||||
|
switch next {
|
||||||
|
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u':
|
||||||
|
// Valid JSON escape sequence — emit both characters as-is.
|
||||||
|
b.WriteByte(s[i])
|
||||||
|
b.WriteByte(next)
|
||||||
|
i += 2
|
||||||
|
default:
|
||||||
|
// Invalid escape — double the backslash.
|
||||||
|
b.WriteByte('\\')
|
||||||
|
b.WriteByte('\\')
|
||||||
|
b.WriteByte(next)
|
||||||
|
i += 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func stripFences(s string) string {
|
||||||
|
for _, prefix := range []string{"```json\n", "```json\r\n", "```\n", "```\r\n"} {
|
||||||
|
if strings.HasPrefix(s, prefix) {
|
||||||
|
s = strings.TrimPrefix(s, prefix)
|
||||||
|
s = strings.TrimSuffix(strings.TrimSpace(s), "```")
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
87
ingestion/internal/pipeline/parse_test.go
Normal file
87
ingestion/internal/pipeline/parse_test.go
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
// ingestion/internal/pipeline/parse_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseRawPages_ValidJSON(t *testing.T) {
|
||||||
|
input := `[{"title":"Shape Up","type":"source","subtype":"book","domain":"product-strategy","content":"## Summary\n\nFoo."},{"title":"Betting","type":"concept","content":"## Definition\n\nA technique."}]`
|
||||||
|
pages, warnings := ParseRawPages(input)
|
||||||
|
require.Len(t, pages, 2)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Equal(t, "Shape Up", pages[0].Title)
|
||||||
|
assert.Equal(t, "source", pages[0].Type)
|
||||||
|
assert.Equal(t, "book", pages[0].Subtype)
|
||||||
|
assert.Equal(t, "product-strategy", pages[0].Domain)
|
||||||
|
assert.Equal(t, "Betting", pages[1].Title)
|
||||||
|
assert.Equal(t, "concept", pages[1].Type)
|
||||||
|
assert.Empty(t, pages[1].Subtype)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_StripsFences(t *testing.T) {
|
||||||
|
input := "```json\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"## Definition\\n\\nFoo.\"}]\n```"
|
||||||
|
pages, warnings := ParseRawPages(input)
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Equal(t, "Foo", pages[0].Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_TruncationRecovery(t *testing.T) {
|
||||||
|
input := `[{"title":"Foo","type":"concept","content":"## Definition\n\nFoo."},{"title":"Bar","type":"concept","content":"trunc`
|
||||||
|
pages, warnings := ParseRawPages(input)
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Equal(t, "Foo", pages[0].Title)
|
||||||
|
assert.NotEmpty(t, warnings)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_EmptyInput(t *testing.T) {
|
||||||
|
pages, warnings := ParseRawPages("")
|
||||||
|
assert.Empty(t, pages)
|
||||||
|
assert.NotEmpty(t, warnings)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_PlainFence(t *testing.T) {
|
||||||
|
input := "```\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"ok\"}]\n```"
|
||||||
|
pages, warnings := ParseRawPages(input)
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_MissingTitle(t *testing.T) {
|
||||||
|
// Missing title — still parsed, Title is empty string
|
||||||
|
input := `[{"type":"concept","content":"## Definition\n\nFoo."}]`
|
||||||
|
pages, warnings := ParseRawPages(input)
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Empty(t, warnings)
|
||||||
|
assert.Empty(t, pages[0].Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRawPages_InvalidEscapeRepaired(t *testing.T) {
|
||||||
|
// LLM copied markdown escaped list numbers (\.) into JSON — invalid escape
|
||||||
|
raw := "[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"Step 4\\. Do it.\"}]"
|
||||||
|
pages, warnings := ParseRawPages(raw)
|
||||||
|
require.Len(t, pages, 1)
|
||||||
|
assert.Equal(t, "Foo", pages[0].Title)
|
||||||
|
assert.Contains(t, pages[0].Content, `4\.`)
|
||||||
|
assert.NotEmpty(t, warnings) // repair warning
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairJSON_FixesInvalidEscapes(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
in string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{`{"a":"foo\.bar"}`, `{"a":"foo\\.bar"}`},
|
||||||
|
{`{"a":"\\n is fine"}`, `{"a":"\\n is fine"}`}, // valid \n untouched
|
||||||
|
{`{"a":"\d+ items"}`, `{"a":"\\d+ items"}`},
|
||||||
|
{`{"a":"already \\ escaped"}`, `{"a":"already \\ escaped"}`}, // valid \\ untouched
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
got := repairJSON(tc.in)
|
||||||
|
assert.Equal(t, tc.want, got, "input: %s", tc.in)
|
||||||
|
}
|
||||||
|
}
|
||||||
126
ingestion/internal/pipeline/pipeline.go
Normal file
126
ingestion/internal/pipeline/pipeline.go
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
// ingestion/internal/pipeline/pipeline.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CompleteFunc is the function signature for LLM calls.
|
||||||
|
type CompleteFunc func(ctx context.Context, system, user string) (string, error)
|
||||||
|
|
||||||
|
// Config holds pipeline configuration.
|
||||||
|
type Config struct {
|
||||||
|
Complete CompleteFunc
|
||||||
|
ChunkSize int // 0 = no chunking
|
||||||
|
Schema string // overrides brain/schema.md when set (useful in tests)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result is the outcome of a pipeline run.
|
||||||
|
type Result struct {
|
||||||
|
Pages []string // relative paths written (or would-be written in dry-run)
|
||||||
|
Warnings []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run ingests content and writes structured wiki pages to brainDir/wiki/.
|
||||||
|
// In dry-run mode, pages are returned but not written to disk.
|
||||||
|
func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryRun bool) (Result, error) {
|
||||||
|
inventory, err := wiki.LoadInventory(brainDir)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("load inventory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
schema := cfg.Schema
|
||||||
|
if schema == "" {
|
||||||
|
schema = loadSchema(brainDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceSlug := wiki.Slug(source)
|
||||||
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
|
chunks := Chunk(content, cfg.ChunkSize)
|
||||||
|
|
||||||
|
var allRaw []RawPage
|
||||||
|
var allWarnings []string
|
||||||
|
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
userPrompt := BuildPrompt(schema, source, chunk, inventory)
|
||||||
|
output, err := cfg.Complete(ctx, systemPrompt, userPrompt)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("LLM call: %w", err)
|
||||||
|
}
|
||||||
|
raw, warnings := ParseRawPages(output)
|
||||||
|
allRaw = append(allRaw, raw...)
|
||||||
|
allWarnings = append(allWarnings, warnings...)
|
||||||
|
}
|
||||||
|
|
||||||
|
pages, buildWarnings := BuildPages(allRaw, sourceSlug, date)
|
||||||
|
allWarnings = append(allWarnings, buildWarnings...)
|
||||||
|
resolved := Resolve(pages, inventory)
|
||||||
|
canonicalized, linkWarnings := CanonicalizeLinks(resolved, inventory)
|
||||||
|
allWarnings = append(allWarnings, linkWarnings...)
|
||||||
|
withRefs := injectSourceRefs(canonicalized, inventory, brainDir)
|
||||||
|
merged := mergeAll(withRefs)
|
||||||
|
|
||||||
|
var written []string
|
||||||
|
for _, page := range merged {
|
||||||
|
if !dryRun {
|
||||||
|
dest := filepath.Join(brainDir, filepath.FromSlash(page.Path))
|
||||||
|
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("mkdir for %s: %w", page.Path, err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil {
|
||||||
|
return Result{}, fmt.Errorf("write %s: %w", page.Path, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
written = append(written, page.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !dryRun {
|
||||||
|
if err := wiki.RebuildIndex(brainDir, date); err != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("rebuild index: %v", err))
|
||||||
|
}
|
||||||
|
if err := wiki.AppendLog(brainDir, source, written, allWarnings, date); err != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("append log: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result{Pages: written, Warnings: allWarnings}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergeAll deduplicates pages by path, merging content from later occurrences.
|
||||||
|
func mergeAll(pages []wiki.Page) []wiki.Page {
|
||||||
|
order := make([]string, 0, len(pages))
|
||||||
|
byPath := make(map[string]wiki.Page, len(pages))
|
||||||
|
for _, p := range pages {
|
||||||
|
if _, seen := byPath[p.Path]; !seen {
|
||||||
|
order = append(order, p.Path)
|
||||||
|
byPath[p.Path] = p
|
||||||
|
} else {
|
||||||
|
byPath[p.Path] = wiki.Merge(byPath[p.Path], p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result := make([]wiki.Page, 0, len(order))
|
||||||
|
for _, path := range order {
|
||||||
|
result = append(result, byPath[path])
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultSchema = `# Brain Wiki Schema
|
||||||
|
Three page types: wiki/sources/, wiki/concepts/, wiki/entities/.
|
||||||
|
See brain/schema.md for the full schema.
|
||||||
|
`
|
||||||
|
|
||||||
|
func loadSchema(brainDir string) string {
|
||||||
|
b, err := os.ReadFile(filepath.Join(brainDir, "schema.md"))
|
||||||
|
if err != nil {
|
||||||
|
return defaultSchema
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(b))
|
||||||
|
}
|
||||||
139
ingestion/internal/pipeline/pipeline_test.go
Normal file
139
ingestion/internal/pipeline/pipeline_test.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
// ingestion/internal/pipeline/pipeline_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRun_WritesPages(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
||||||
|
}
|
||||||
|
|
||||||
|
llmResponse := mustJSON([]RawPage{
|
||||||
|
{
|
||||||
|
Title: "Test Article",
|
||||||
|
Type: "source",
|
||||||
|
Subtype: "article",
|
||||||
|
Domain: "software-engineering",
|
||||||
|
Content: "## Summary\n\nA test article.\n\n## Key Claims\n\n- It tests things.\n\n## Concepts Introduced or Reinforced\n\n[[Testing]]\n\n## Entities Mentioned\n\n## Open Questions Raised\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Title: "Testing",
|
||||||
|
Type: "concept",
|
||||||
|
Domain: "software-engineering",
|
||||||
|
Content: "## Definition\n\nThe practice of verifying software.\n\n## Why It Matters\n\nCatches bugs.\n\n## Related Concepts\n\n## Related Entities\n\n## Sources\n\n## Evolving Notes\n",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"role": "assistant", "content": llmResponse}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
Complete: llm.New(srv.URL, "", "test-model", 30*time.Second).Complete,
|
||||||
|
ChunkSize: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := Run(context.Background(), cfg, brainDir, "An article about testing.", "test-article", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, result.Pages, 2)
|
||||||
|
|
||||||
|
_, err = os.Stat(filepath.Join(brainDir, "wiki", "sources", "test-article.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = os.Stat(filepath.Join(brainDir, "wiki", "concepts", "testing.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = os.Stat(filepath.Join(brainDir, "wiki", "index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = os.Stat(filepath.Join(brainDir, "log.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRun_DryRunDoesNotWrite(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
||||||
|
}
|
||||||
|
|
||||||
|
llmResponse := mustJSON([]RawPage{{
|
||||||
|
Title: "Foo",
|
||||||
|
Type: "source",
|
||||||
|
Subtype: "article",
|
||||||
|
Content: "## Summary\n\nFoo.\n",
|
||||||
|
}})
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{{"message": map[string]any{"content": llmResponse}}},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
cfg := Config{Complete: llm.New(srv.URL, "", "m", 30*time.Second).Complete}
|
||||||
|
result, err := Run(context.Background(), cfg, brainDir, "foo content", "foo", true)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, result.Pages, 1)
|
||||||
|
|
||||||
|
_, err = os.Stat(filepath.Join(brainDir, "wiki", "sources", "foo.md"))
|
||||||
|
assert.True(t, os.IsNotExist(err))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRun_MergesDuplicatePaths(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLM returns same title twice (simulates multi-chunk duplicate)
|
||||||
|
llmResponse := mustJSON([]RawPage{
|
||||||
|
{Title: "Foo", Type: "concept", Content: "## Definition\n\nFirst.\n\n## Related Concepts\n\n[[Bar]]\n"},
|
||||||
|
{Title: "Foo", Type: "concept", Content: "## Definition\n\nSecond.\n\n## Related Concepts\n\n[[Baz]]\n"},
|
||||||
|
})
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{{"message": map[string]any{"content": llmResponse}}},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
cfg := Config{Complete: llm.New(srv.URL, "", "m", 30*time.Second).Complete}
|
||||||
|
result, err := Run(context.Background(), cfg, brainDir, "content", "foo", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, result.Pages, 1) // deduplicated
|
||||||
|
|
||||||
|
content, err := os.ReadFile(filepath.Join(brainDir, "wiki", "concepts", "foo.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
// keep-first for Definition, union for Related Concepts
|
||||||
|
assert.Contains(t, string(content), "First.")
|
||||||
|
// Bar and Baz unknown in empty inventory → left as plain [[links]]
|
||||||
|
assert.Contains(t, string(content), "[[Bar]]")
|
||||||
|
assert.Contains(t, string(content), "[[Baz]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustJSON(v any) string {
|
||||||
|
b, err := json.Marshal(v)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
63
ingestion/internal/pipeline/prompt.go
Normal file
63
ingestion/internal/pipeline/prompt.go
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
// ingestion/internal/pipeline/prompt.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
const systemPrompt = `You are a wiki agent. Read the source material and produce structured wiki pages following the schema provided.
|
||||||
|
|
||||||
|
Output ONLY a valid JSON array — no markdown fences, no other text before or after.
|
||||||
|
Each element must have exactly these fields:
|
||||||
|
"title" — exact page title (e.g. "FinBERT", "Ryan Singer", "Shape Up")
|
||||||
|
"type" — exactly one of: "source", "concept", "entity"
|
||||||
|
"subtype" — for source: article|pdf|book|video|note|project; for entity: person|company|tool|model|framework|technology; omit for concept
|
||||||
|
"domain" — one of the domains in the schema (omit if none fits)
|
||||||
|
"content" — Markdown body only — NO frontmatter, NO path, NO slug
|
||||||
|
|
||||||
|
Wikilinks in content: [[Display Name]] — just the display name, no slug, no pipe separator.
|
||||||
|
Only link to pages listed in the inventory or pages you are creating in this response.`
|
||||||
|
|
||||||
|
// BuildPrompt constructs the user prompt for a single chunk.
|
||||||
|
func BuildPrompt(schema, source, content string, inventory map[wiki.PageType][]wiki.Entry) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
fmt.Fprintf(&sb, "Today's date is %s.\n\n", time.Now().UTC().Format("2006-01-02"))
|
||||||
|
|
||||||
|
sb.WriteString("## Schema\n\n")
|
||||||
|
sb.WriteString(schema)
|
||||||
|
sb.WriteString("\n\n")
|
||||||
|
|
||||||
|
sb.WriteString("## Existing wiki pages\n\n")
|
||||||
|
sb.WriteString("Reference these pages by display name only — [[Display Name]] — in your content.\n\n")
|
||||||
|
|
||||||
|
for _, pt := range []wiki.PageType{wiki.PageTypeConcept, wiki.PageTypeEntity, wiki.PageTypeSource} {
|
||||||
|
entries := inventory[pt]
|
||||||
|
label := strings.ToUpper(string(pt)[:1]) + string(pt)[1:]
|
||||||
|
if len(entries) == 0 {
|
||||||
|
fmt.Fprintf(&sb, "%s — (none yet)\n\n", label)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&sb, "%s:\n", label)
|
||||||
|
for _, e := range entries {
|
||||||
|
fmt.Fprintf(&sb, " - %s\n", e.Title)
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString("## Non-negotiable rules\n\n")
|
||||||
|
sb.WriteString("1. Output ONLY a valid JSON array — no prose, no fences.\n")
|
||||||
|
sb.WriteString("2. Fields: title, type, subtype (if applicable), domain (if applicable), content.\n")
|
||||||
|
sb.WriteString("3. Wikilinks: [[Display Name]] — no slug, no pipe. The pipeline handles slugs.\n")
|
||||||
|
sb.WriteString("4. Section links must match their section type (Related Concepts → concepts only, etc.).\n")
|
||||||
|
sb.WriteString("5. One source page per book — if inventory shows it exists, return it as an UPDATE.\n\n")
|
||||||
|
|
||||||
|
fmt.Fprintf(&sb, "## Source: %s\n\n", source)
|
||||||
|
sb.WriteString(content)
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
115
ingestion/internal/pipeline/refs.go
Normal file
115
ingestion/internal/pipeline/refs.go
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
// ingestion/internal/pipeline/refs.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||||
|
|
||||||
|
// injectSourceRefs finds the source page in the proposed batch, extracts its
|
||||||
|
// wikilinks, and injects a back-reference into every linked concept or entity page.
|
||||||
|
// Pages that exist on disk but are not in the current batch are loaded and
|
||||||
|
// appended so they will be updated on write.
|
||||||
|
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||||
|
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||||
|
if !found {
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
var sourceContent string
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||||
|
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||||
|
sourceContent = p.Content
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
linkedSlugs := extractWikilinks(sourceContent)
|
||||||
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||||
|
|
||||||
|
bySlug := make(map[string]int, len(pages))
|
||||||
|
for i, p := range pages {
|
||||||
|
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for slug := range linkedSlugs {
|
||||||
|
if slug == sourceSlug {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if idx, ok := bySlug[slug]; ok {
|
||||||
|
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pt, ok := findInInventory(slug, inventory)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||||
|
b, err := os.ReadFile(diskPath)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
page := wiki.Page{
|
||||||
|
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||||
|
Content: string(b),
|
||||||
|
}
|
||||||
|
pages = append(pages, addSourceRef(page, sourceRef))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// addSourceRef injects sourceRef into the ## Sources bullet section of page
|
||||||
|
// using wiki.Merge, which deduplicates bullets automatically.
|
||||||
|
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||||
|
patch := wiki.Page{
|
||||||
|
Path: page.Path,
|
||||||
|
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||||
|
}
|
||||||
|
return wiki.Merge(page, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||||
|
func extractWikilinks(content string) map[string]bool {
|
||||||
|
slugs := make(map[string]bool)
|
||||||
|
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||||
|
slugs[m[1]] = true
|
||||||
|
}
|
||||||
|
return slugs
|
||||||
|
}
|
||||||
|
|
||||||
|
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||||
|
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||||
|
title = extractTitle(p.Content)
|
||||||
|
if title == "" {
|
||||||
|
title = slug
|
||||||
|
}
|
||||||
|
return slug, title, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||||
|
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.Slug == slug {
|
||||||
|
return pt, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
172
ingestion/internal/pipeline/refs_test.go
Normal file
172
ingestion/internal/pipeline/refs_test.go
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
// ingestion/internal/pipeline/refs_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||||
|
inv := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
wiki.PageTypeEntity: {},
|
||||||
|
wiki.PageTypeSource: {},
|
||||||
|
}
|
||||||
|
for _, slug := range concepts {
|
||||||
|
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
for _, slug := range entities {
|
||||||
|
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
return inv
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Equal(t, pages, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/domain-driven-design.md",
|
||||||
|
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
assert.Contains(t, got[1].Content, "## Sources")
|
||||||
|
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||||
|
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(conceptDir, "shape-up.md"),
|
||||||
|
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inv := makeInventory([]string{"shape-up"}, nil)
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, inv, brainDir)
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var conceptPage wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/concepts/shape-up.md" {
|
||||||
|
conceptPage = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||||
|
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/ddd.md",
|
||||||
|
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
count := 0
|
||||||
|
for _, line := range splitLines(got[1].Content) {
|
||||||
|
if line == "- [[my-article|My Article]]" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/book.md",
|
||||||
|
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/entities/ryan-singer.md",
|
||||||
|
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var entity wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||||
|
entity = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractWikilinks(t *testing.T) {
|
||||||
|
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||||
|
got := extractWikilinks(content)
|
||||||
|
assert.True(t, got["foo"])
|
||||||
|
assert.True(t, got["bar"])
|
||||||
|
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitLines(s string) []string {
|
||||||
|
var out []string
|
||||||
|
start := 0
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if s[i] == '\n' {
|
||||||
|
if line := s[start:i]; line != "" {
|
||||||
|
out = append(out, line)
|
||||||
|
}
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if last := s[start:]; last != "" {
|
||||||
|
out = append(out, last)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
88
ingestion/internal/pipeline/resolve.go
Normal file
88
ingestion/internal/pipeline/resolve.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// ingestion/internal/pipeline/resolve.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Resolve remaps proposed pages to existing slugs when a fuzzy title match is found.
|
||||||
|
// It only matches within the same page type (entities→entities, concepts→concepts).
|
||||||
|
// Pages with no inventory match are returned unchanged.
|
||||||
|
func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page {
|
||||||
|
type key struct {
|
||||||
|
pt wiki.PageType
|
||||||
|
normalized string
|
||||||
|
}
|
||||||
|
lookup := make(map[key]string) // key → canonical slug
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(e.Title)}
|
||||||
|
lookup[k] = e.Slug
|
||||||
|
for _, alias := range e.Aliases {
|
||||||
|
ak := key{pt: pt, normalized: normalizeTitle(alias)}
|
||||||
|
if _, exists := lookup[ak]; !exists {
|
||||||
|
lookup[ak] = e.Slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]wiki.Page, 0, len(proposed))
|
||||||
|
for _, page := range proposed {
|
||||||
|
pt := pageTypeFromPath(page.Path)
|
||||||
|
title := extractTitle(page.Content)
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(title)}
|
||||||
|
if canonicalSlug, ok := lookup[k]; ok {
|
||||||
|
dir := filepath.Dir(page.Path)
|
||||||
|
page.Path = dir + "/" + canonicalSlug + ".md"
|
||||||
|
}
|
||||||
|
out = append(out, page)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeTitle lowercases, removes leading articles, collapses whitespace.
|
||||||
|
// "The Shape Up Method" → "shape up method"
|
||||||
|
func normalizeTitle(s string) string {
|
||||||
|
s = strings.ToLower(strings.TrimSpace(s))
|
||||||
|
for _, article := range []string{"the ", "a ", "an "} {
|
||||||
|
s = strings.TrimPrefix(s, article)
|
||||||
|
}
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
return strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md".
|
||||||
|
func pageTypeFromPath(path string) wiki.PageType {
|
||||||
|
parts := strings.Split(filepath.ToSlash(path), "/")
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
return wiki.PageType(parts[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractTitle reads the title field from YAML frontmatter in content.
|
||||||
|
// Falls back to empty string if not found.
|
||||||
|
func extractTitle(content string) string {
|
||||||
|
lines := strings.SplitN(content, "\n", 30)
|
||||||
|
inFM := false
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if inFM {
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if ok && strings.TrimSpace(key) == "title" {
|
||||||
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
90
ingestion/internal/pipeline/resolve_test.go
Normal file
90
ingestion/internal/pipeline/resolve_test.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// ingestion/internal/pipeline/resolve_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestResolve_NoMatch(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/new-person.md", Content: "---\ntitle: New Person\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/new-person.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_TitleMatchRedirectsSlug(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/ryan-singer-the-designer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_AliasMatchRedirectsSlug(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/singer.md", Content: "---\ntitle: Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer", "R. Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_NormalizationCaseAndArticles(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/the-shape-up-method.md", Content: "---\ntitle: The Shape Up Method\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "shape-up-method", Title: "Shape Up Method", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/shape-up-method.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_OnlyMatchesSamePageType(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/ryan-singer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_EmptyInventory(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/first.md", Content: "---\ntitle: First\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Equal(t, proposed, got)
|
||||||
|
}
|
||||||
@@ -33,7 +33,12 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
|||||||
|
|
||||||
var results []Result
|
var results []Result
|
||||||
|
|
||||||
err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error {
|
for _, subdir := range []string{"knowledge", "wiki"} {
|
||||||
|
dir := filepath.Join(brainDir, subdir)
|
||||||
|
if _, statErr := os.Stat(dir); os.IsNotExist(statErr) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err := filepath.WalkDir(dir, func(path string, d os.DirEntry, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("search: skipping path", "path", path, "err", err)
|
slog.Warn("search: skipping path", "path", path, "err", err)
|
||||||
return nil
|
return nil
|
||||||
@@ -74,6 +79,7 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sort.Slice(results, func(i, j int) bool {
|
sort.Slice(results, func(i, j int) bool {
|
||||||
return results[i].Score > results[j].Score
|
return results[i].Score > results[j].Score
|
||||||
|
|||||||
@@ -14,17 +14,15 @@ import (
|
|||||||
|
|
||||||
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||||
|
|
||||||
// Write a concept page mentioning "retry"
|
|
||||||
require.NoError(t, os.WriteFile(
|
require.NoError(t, os.WriteFile(
|
||||||
filepath.Join(dir, "wiki", "concepts", "retry-logic.md"),
|
filepath.Join(dir, "knowledge", "retry-logic.md"),
|
||||||
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
|
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
|
||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
// Write an unrelated page
|
|
||||||
require.NoError(t, os.WriteFile(
|
require.NoError(t, os.WriteFile(
|
||||||
filepath.Join(dir, "wiki", "concepts", "database.md"),
|
filepath.Join(dir, "knowledge", "database.md"),
|
||||||
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
|
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
|
||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
@@ -32,7 +30,7 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
|||||||
results, err := search.Query(dir, "retry transient", 5)
|
results, err := search.Query(dir, "retry transient", 5)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, results, 1)
|
require.Len(t, results, 1)
|
||||||
assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path)
|
assert.Equal(t, "knowledge/retry-logic.md", results[0].Path)
|
||||||
assert.Equal(t, "Retry Logic", results[0].Title)
|
assert.Equal(t, "Retry Logic", results[0].Title)
|
||||||
assert.Greater(t, results[0].Score, 0)
|
assert.Greater(t, results[0].Score, 0)
|
||||||
assert.Contains(t, results[0].Excerpt, "Retry")
|
assert.Contains(t, results[0].Excerpt, "Retry")
|
||||||
@@ -40,10 +38,10 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
|||||||
|
|
||||||
func TestSearch_RespectsLimit(t *testing.T) {
|
func TestSearch_RespectsLimit(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
require.NoError(t, os.WriteFile(
|
require.NoError(t, os.WriteFile(
|
||||||
filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)),
|
filepath.Join(dir, "knowledge", fmt.Sprintf("page-%d.md", i)),
|
||||||
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
|
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
|
||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
|
|||||||
210
ingestion/internal/watcher/watcher.go
Normal file
210
ingestion/internal/watcher/watcher.go
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
// ingestion/internal/watcher/watcher.go
|
||||||
|
package watcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config holds watcher configuration.
|
||||||
|
type Config struct {
|
||||||
|
BrainDir string
|
||||||
|
Interval time.Duration
|
||||||
|
Pipeline pipeline.Config
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start launches the watcher in a background goroutine.
|
||||||
|
// It returns immediately. The watcher stops when ctx is cancelled.
|
||||||
|
func Start(ctx context.Context, cfg Config) {
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(cfg.Interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
|
errs := processDir(ctx, cfg, date)
|
||||||
|
for _, err := range errs {
|
||||||
|
slog.Error("watcher: error processing file", "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// processDir walks brain/raw/, processes each eligible file, returns any errors encountered.
|
||||||
|
func processDir(ctx context.Context, cfg Config, date string) []error {
|
||||||
|
rawDir := filepath.Join(cfg.BrainDir, "raw")
|
||||||
|
|
||||||
|
var errs []error
|
||||||
|
err := filepath.WalkDir(rawDir, func(path string, d os.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip the root itself.
|
||||||
|
if path == rawDir {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip processed/ and failed/ subdirectories entirely.
|
||||||
|
if d.IsDir() {
|
||||||
|
name := d.Name()
|
||||||
|
if name == "processed" || name == "failed" {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only process supported extensions.
|
||||||
|
ext := strings.ToLower(filepath.Ext(path))
|
||||||
|
if ext != ".md" && ext != ".txt" && ext != ".pdf" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip files that have already been processed or permanently failed.
|
||||||
|
if _, err := os.Stat(path + ".processed"); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(path + ".failed"); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := processFile(ctx, cfg, path, date); err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("process %s: %w", filepath.Base(path), err))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("walk raw dir: %w", err))
|
||||||
|
}
|
||||||
|
return errs
|
||||||
|
}
|
||||||
|
|
||||||
|
// processFile reads a file, calls pipeline.Run, copies it to processed/ or failed/,
|
||||||
|
// and writes a marker file next to the original so the watcher skips it next poll.
|
||||||
|
// The original file is never deleted, keeping Syncthing-connected vaults (e.g. Obsidian) intact.
|
||||||
|
func processFile(ctx context.Context, cfg Config, path, date string) error {
|
||||||
|
filename := filepath.Base(path)
|
||||||
|
source := deriveSource(filename)
|
||||||
|
|
||||||
|
content, err := extract.Text(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("extract text: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, content, source, false)
|
||||||
|
if runErr != nil {
|
||||||
|
// Copy to failed/ and leave a .failed marker so we don't retry.
|
||||||
|
failedDir := filepath.Join(cfg.BrainDir, "raw", "failed")
|
||||||
|
if mkErr := os.MkdirAll(failedDir, 0o755); mkErr != nil {
|
||||||
|
return fmt.Errorf("mkdir failed dir: %w", mkErr)
|
||||||
|
}
|
||||||
|
dest := filepath.Join(failedDir, filename)
|
||||||
|
if cpErr := copyFile(path, dest); cpErr != nil {
|
||||||
|
return fmt.Errorf("copy to failed: %w", cpErr)
|
||||||
|
}
|
||||||
|
if mkErr := os.WriteFile(path+".failed", []byte(runErr.Error()), 0o644); mkErr != nil {
|
||||||
|
slog.Error("watcher: failed to write .failed marker", "error", mkErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Warn("watcher: file failed", "file", filename, "error", runErr)
|
||||||
|
|
||||||
|
if logErr := appendWatcherLog(cfg.BrainDir, filename, runErr, date); logErr != nil {
|
||||||
|
slog.Error("watcher: failed to write log entry", "error", logErr)
|
||||||
|
}
|
||||||
|
// Return nil: quarantine succeeded; error already logged.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy to processed/YYYY-MM-DD/ and leave a .processed marker so we don't re-ingest.
|
||||||
|
processedDir := filepath.Join(cfg.BrainDir, "raw", "processed", date)
|
||||||
|
if err := os.MkdirAll(processedDir, 0o755); err != nil {
|
||||||
|
return fmt.Errorf("mkdir processed dir: %w", err)
|
||||||
|
}
|
||||||
|
dest := filepath.Join(processedDir, filename)
|
||||||
|
if _, err := os.Stat(dest); err == nil {
|
||||||
|
// Archive copy already exists; append timestamp to avoid overwriting.
|
||||||
|
ext := filepath.Ext(filename)
|
||||||
|
base := strings.TrimSuffix(filename, ext)
|
||||||
|
dest = filepath.Join(processedDir, base+"-"+time.Now().UTC().Format("150405")+ext)
|
||||||
|
}
|
||||||
|
if err := copyFile(path, dest); err != nil {
|
||||||
|
return fmt.Errorf("copy to processed: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path+".processed", []byte(date), 0o644); err != nil {
|
||||||
|
slog.Error("watcher: failed to write .processed marker", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("watcher: file processed", "file", filename, "source", source)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// copyFile copies src to dst, creating dst if it doesn't exist.
|
||||||
|
func copyFile(src, dst string) error {
|
||||||
|
in, err := os.Open(src)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open src: %w", err)
|
||||||
|
}
|
||||||
|
defer in.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
out, err := os.Create(dst)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create dst: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := io.Copy(out, in); err != nil {
|
||||||
|
out.Close() //nolint:errcheck
|
||||||
|
return fmt.Errorf("copy: %w", err)
|
||||||
|
}
|
||||||
|
return out.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveSource turns a filename into a human-readable source name.
|
||||||
|
// "shape-up-book.md" → "Shape Up Book"
|
||||||
|
func deriveSource(filename string) string {
|
||||||
|
// Strip extension.
|
||||||
|
name := strings.TrimSuffix(filename, filepath.Ext(filename))
|
||||||
|
// Split on hyphens.
|
||||||
|
words := strings.Split(name, "-")
|
||||||
|
// Title-case each word.
|
||||||
|
for i, w := range words {
|
||||||
|
if w == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
runes := []rune(w)
|
||||||
|
runes[0] = unicode.ToUpper(runes[0])
|
||||||
|
words[i] = string(runes)
|
||||||
|
}
|
||||||
|
return strings.Join(words, " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// appendWatcherLog appends a watcher error entry to brain/log.md.
|
||||||
|
func appendWatcherLog(brainDir, filename string, runErr error, date string) error {
|
||||||
|
entry := fmt.Sprintf("## %s — watcher error\n\n- **File:** %s\n- **Error:** %s\n\n",
|
||||||
|
date, filename, runErr.Error())
|
||||||
|
|
||||||
|
logPath := filepath.Join(brainDir, "log.md")
|
||||||
|
f, err := os.OpenFile(logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open log: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = f.WriteString(entry); err != nil {
|
||||||
|
f.Close() //nolint:errcheck
|
||||||
|
return fmt.Errorf("write log: %w", err)
|
||||||
|
}
|
||||||
|
return f.Close()
|
||||||
|
}
|
||||||
231
ingestion/internal/watcher/watcher_test.go
Normal file
231
ingestion/internal/watcher/watcher_test.go
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
// ingestion/internal/watcher/watcher_test.go
|
||||||
|
package watcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
)
|
||||||
|
|
||||||
|
// successComplete returns a valid JSON-encoded RawPage array for any call.
|
||||||
|
func successComplete(raw pipeline.RawPage) pipeline.CompleteFunc {
|
||||||
|
return func(ctx context.Context, system, user string) (string, error) {
|
||||||
|
b, err := json.Marshal([]pipeline.RawPage{raw})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// errorComplete always returns an error simulating an LLM failure.
|
||||||
|
func errorComplete(_ context.Context, _, _ string) (string, error) {
|
||||||
|
return "", fmt.Errorf("LLM unavailable")
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupBrainDir(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources", "raw"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
||||||
|
}
|
||||||
|
return brainDir
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStart_ProcessesFile(t *testing.T) {
|
||||||
|
brainDir := setupBrainDir(t)
|
||||||
|
|
||||||
|
// Place a .md file in raw/.
|
||||||
|
rawFile := filepath.Join(brainDir, "raw", "shape-up-book.md")
|
||||||
|
require.NoError(t, os.WriteFile(rawFile, []byte("Content about Shape Up."), 0o644))
|
||||||
|
|
||||||
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
|
rawPage := pipeline.RawPage{
|
||||||
|
Title: "Shape Up Book",
|
||||||
|
Type: "source",
|
||||||
|
Subtype: "article",
|
||||||
|
Domain: "product-management",
|
||||||
|
Content: "## Summary\n\nA book about Shape Up.\n",
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
BrainDir: brainDir,
|
||||||
|
Interval: 50 * time.Millisecond,
|
||||||
|
Pipeline: pipeline.Config{
|
||||||
|
Complete: successComplete(rawPage),
|
||||||
|
ChunkSize: 0,
|
||||||
|
Schema: "# Schema\nThree page types.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
Start(ctx, cfg)
|
||||||
|
|
||||||
|
// Poll until the file is moved to processed/.
|
||||||
|
processedPath := filepath.Join(brainDir, "raw", "processed", date, "shape-up-book.md")
|
||||||
|
var found bool
|
||||||
|
deadline := time.Now().Add(2 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if _, err := os.Stat(processedPath); err == nil {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
}
|
||||||
|
require.True(t, found, "file should be copied to processed/")
|
||||||
|
|
||||||
|
// Original file should still exist (copy, not move — keeps Obsidian vault intact).
|
||||||
|
_, err := os.Stat(rawFile)
|
||||||
|
assert.NoError(t, err, "original file should remain in raw/")
|
||||||
|
|
||||||
|
// A .processed marker should exist next to the original.
|
||||||
|
_, err = os.Stat(rawFile + ".processed")
|
||||||
|
assert.NoError(t, err, ".processed marker should be written")
|
||||||
|
|
||||||
|
// Wiki page should exist.
|
||||||
|
wikiPath := filepath.Join(brainDir, "wiki", "sources", "shape-up-book.md")
|
||||||
|
_, err = os.Stat(wikiPath)
|
||||||
|
assert.NoError(t, err, "wiki page should be written")
|
||||||
|
|
||||||
|
// log.md should contain an ingest record.
|
||||||
|
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(logContent), "— ingest")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStart_MovesToFailedOnError(t *testing.T) {
|
||||||
|
brainDir := setupBrainDir(t)
|
||||||
|
|
||||||
|
rawFile := filepath.Join(brainDir, "raw", "bad-file.md")
|
||||||
|
require.NoError(t, os.WriteFile(rawFile, []byte("Some content."), 0o644))
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
BrainDir: brainDir,
|
||||||
|
Interval: 50 * time.Millisecond,
|
||||||
|
Pipeline: pipeline.Config{
|
||||||
|
Complete: errorComplete,
|
||||||
|
ChunkSize: 0,
|
||||||
|
Schema: "# Schema\nThree page types.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
Start(ctx, cfg)
|
||||||
|
|
||||||
|
// Poll until the file is moved to failed/.
|
||||||
|
failedPath := filepath.Join(brainDir, "raw", "failed", "bad-file.md")
|
||||||
|
var found bool
|
||||||
|
deadline := time.Now().Add(2 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if _, err := os.Stat(failedPath); err == nil {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
}
|
||||||
|
require.True(t, found, "file should be copied to failed/")
|
||||||
|
|
||||||
|
// Original file should still exist (copy, not move — keeps Obsidian vault intact).
|
||||||
|
_, err := os.Stat(rawFile)
|
||||||
|
assert.NoError(t, err, "original file should remain in raw/")
|
||||||
|
|
||||||
|
// A .failed marker should exist next to the original.
|
||||||
|
_, err = os.Stat(rawFile + ".failed")
|
||||||
|
assert.NoError(t, err, ".failed marker should be written")
|
||||||
|
|
||||||
|
// log.md should contain a watcher error entry.
|
||||||
|
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(logContent), "— watcher error")
|
||||||
|
assert.Contains(t, string(logContent), "bad-file.md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveSource(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
filename string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"shape-up-book.md", "Shape Up Book"},
|
||||||
|
{"raft-consensus.txt", "Raft Consensus"},
|
||||||
|
{"my-note.md", "My Note"},
|
||||||
|
{"single.md", "Single"},
|
||||||
|
{"no-extension", "No Extension"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.filename, func(t *testing.T) {
|
||||||
|
got := deriveSource(tc.filename)
|
||||||
|
assert.Equal(t, tc.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProcessDir_SkipsSubdirs(t *testing.T) {
|
||||||
|
brainDir := setupBrainDir(t)
|
||||||
|
|
||||||
|
// Create processed/ and failed/ subdirs with files inside.
|
||||||
|
for _, sub := range []string{"processed/2026-04-22", "failed"} {
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "raw", sub), 0o755))
|
||||||
|
}
|
||||||
|
|
||||||
|
processedFile := filepath.Join(brainDir, "raw", "processed", "2026-04-22", "old-file.md")
|
||||||
|
failedFile := filepath.Join(brainDir, "raw", "failed", "broken-file.md")
|
||||||
|
require.NoError(t, os.WriteFile(processedFile, []byte("old"), 0o644))
|
||||||
|
require.NoError(t, os.WriteFile(failedFile, []byte("broken"), 0o644))
|
||||||
|
|
||||||
|
// Also place a valid file in raw/ root that should be processed.
|
||||||
|
validFile := filepath.Join(brainDir, "raw", "valid.md")
|
||||||
|
require.NoError(t, os.WriteFile(validFile, []byte("valid content"), 0o644))
|
||||||
|
|
||||||
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
|
|
||||||
|
// Track which sources were passed to Complete.
|
||||||
|
var processedSources []string
|
||||||
|
completeFn := func(ctx context.Context, system, user string) (string, error) {
|
||||||
|
// Record that this was called; return a minimal valid RawPage.
|
||||||
|
raw := pipeline.RawPage{
|
||||||
|
Title: "Valid",
|
||||||
|
Type: "source",
|
||||||
|
Subtype: "article",
|
||||||
|
Content: "## Summary\n\nValid.\n",
|
||||||
|
}
|
||||||
|
b, _ := json.Marshal([]pipeline.RawPage{raw})
|
||||||
|
processedSources = append(processedSources, "called")
|
||||||
|
return string(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
BrainDir: brainDir,
|
||||||
|
Interval: time.Hour, // not used; we call processDir directly
|
||||||
|
Pipeline: pipeline.Config{
|
||||||
|
Complete: completeFn,
|
||||||
|
ChunkSize: 0,
|
||||||
|
Schema: "# Schema\nThree page types.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
errs := processDir(context.Background(), cfg, date)
|
||||||
|
assert.Empty(t, errs, "no errors expected")
|
||||||
|
|
||||||
|
// Complete should have been called exactly once (for valid.md, not for files in subdirs).
|
||||||
|
assert.Len(t, processedSources, 1, "only the file in raw/ root should be processed")
|
||||||
|
|
||||||
|
// Files in processed/ and failed/ must remain untouched.
|
||||||
|
_, err := os.Stat(processedFile)
|
||||||
|
assert.NoError(t, err, "processed subdir file should be untouched")
|
||||||
|
_, err = os.Stat(failedFile)
|
||||||
|
assert.NoError(t, err, "failed subdir file should be untouched")
|
||||||
|
}
|
||||||
71
ingestion/internal/wiki/index.go
Normal file
71
ingestion/internal/wiki/index.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
// ingestion/internal/wiki/index.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RebuildIndex writes brain/wiki/index.md from the current wiki contents.
|
||||||
|
func RebuildIndex(brainDir, date string) error {
|
||||||
|
inv, err := LoadInventory(brainDir)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("load inventory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
total := len(inv[PageTypeConcept]) + len(inv[PageTypeEntity]) + len(inv[PageTypeSource])
|
||||||
|
var sb strings.Builder
|
||||||
|
fmt.Fprintf(&sb, "# Wiki Index\n\n")
|
||||||
|
fmt.Fprintf(&sb, "_Updated: %s — %d pages (%d concepts, %d entities, %d sources)_\n\n",
|
||||||
|
date, total,
|
||||||
|
len(inv[PageTypeConcept]),
|
||||||
|
len(inv[PageTypeEntity]),
|
||||||
|
len(inv[PageTypeSource]))
|
||||||
|
|
||||||
|
for _, pt := range []PageType{PageTypeConcept, PageTypeEntity, PageTypeSource} {
|
||||||
|
entries := inv[pt]
|
||||||
|
if len(entries) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := strings.ToUpper(string(pt)[:1]) + string(pt)[1:]
|
||||||
|
fmt.Fprintf(&sb, "## %s\n\n", label)
|
||||||
|
for _, e := range entries {
|
||||||
|
summary := pageFirstSentence(brainDir, e)
|
||||||
|
if summary != "" {
|
||||||
|
fmt.Fprintf(&sb, "- [[%s|%s]] — %s\n", e.Slug, e.Title, summary)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(&sb, "- [[%s|%s]]\n", e.Slug, e.Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
dest := filepath.Join(brainDir, "wiki", "index.md")
|
||||||
|
return os.WriteFile(dest, []byte(sb.String()), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func pageFirstSentence(brainDir string, e Entry) string {
|
||||||
|
path := filepath.Join(brainDir, "wiki", string(e.Type), e.Slug+".md")
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(string(content), "---", 3)
|
||||||
|
body := string(content)
|
||||||
|
if len(parts) == 3 {
|
||||||
|
body = parts[2]
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(body, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(line) > 100 {
|
||||||
|
return line[:100] + "…"
|
||||||
|
}
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
76
ingestion/internal/wiki/index_test.go
Normal file
76
ingestion/internal/wiki/index_test.go
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
// ingestion/internal/wiki/index_test.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setupWikiDir(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "concepts", "tdd.md"),
|
||||||
|
[]byte("---\ntitle: TDD\n---\n\n## Definition\n\nTest-driven development is a discipline.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
return dir
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebuildIndex(t *testing.T) {
|
||||||
|
dir := setupWikiDir(t)
|
||||||
|
require.NoError(t, RebuildIndex(dir, "2026-04-22"))
|
||||||
|
|
||||||
|
content, err := os.ReadFile(filepath.Join(dir, "wiki", "index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
s := string(content)
|
||||||
|
assert.Contains(t, s, "# Wiki Index")
|
||||||
|
assert.Contains(t, s, "2026-04-22")
|
||||||
|
assert.Contains(t, s, "[[tdd|TDD]]")
|
||||||
|
assert.Contains(t, s, "## Concepts")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebuildIndex_EmptyWiki(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, RebuildIndex(dir, "2026-04-22"))
|
||||||
|
content, err := os.ReadFile(filepath.Join(dir, "wiki", "index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(content), "# Wiki Index")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAppendLog(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, AppendLog(dir, "shape-up-book",
|
||||||
|
[]string{"wiki/sources/shape-up.md", "wiki/concepts/betting-table.md"},
|
||||||
|
nil, "2026-04-22"))
|
||||||
|
|
||||||
|
content, err := os.ReadFile(filepath.Join(dir, "log.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
s := string(content)
|
||||||
|
assert.Contains(t, s, "shape-up-book")
|
||||||
|
assert.Contains(t, s, "wiki/sources/shape-up.md")
|
||||||
|
assert.True(t, strings.HasPrefix(s, "## 2026-04-22"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAppendLog_AppendsOnSecondCall(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, AppendLog(dir, "source-a", []string{"wiki/sources/a.md"}, nil, "2026-04-22"))
|
||||||
|
require.NoError(t, AppendLog(dir, "source-b", []string{"wiki/sources/b.md"}, nil, "2026-04-22"))
|
||||||
|
|
||||||
|
content, err := os.ReadFile(filepath.Join(dir, "log.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(content), "source-a")
|
||||||
|
assert.Contains(t, string(content), "source-b")
|
||||||
|
}
|
||||||
90
ingestion/internal/wiki/inventory.go
Normal file
90
ingestion/internal/wiki/inventory.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// ingestion/internal/wiki/inventory.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LoadInventory walks brain/wiki/ and returns all pages grouped by type.
|
||||||
|
// Missing subdirectories are silently skipped.
|
||||||
|
func LoadInventory(brainDir string) (map[PageType][]Entry, error) {
|
||||||
|
result := map[PageType][]Entry{
|
||||||
|
PageTypeConcept: {},
|
||||||
|
PageTypeEntity: {},
|
||||||
|
PageTypeSource: {},
|
||||||
|
}
|
||||||
|
for pt := range result {
|
||||||
|
dir := filepath.Join(brainDir, "wiki", string(pt))
|
||||||
|
entries, err := os.ReadDir(dir)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read dir %s: %w", dir, err)
|
||||||
|
}
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slug := strings.TrimSuffix(e.Name(), ".md")
|
||||||
|
path := filepath.Join(dir, e.Name())
|
||||||
|
title, aliases := readFrontmatter(path, slug)
|
||||||
|
result[pt] = append(result[pt], Entry{Slug: slug, Title: title, Aliases: aliases, Type: pt})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readFrontmatter extracts title and aliases from YAML frontmatter.
|
||||||
|
// Falls back to slug for title and empty aliases on any error.
|
||||||
|
func readFrontmatter(path, fallbackSlug string) (title string, aliases []string) {
|
||||||
|
title = fallbackSlug
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
inFM := false
|
||||||
|
inAliases := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break // end of frontmatter
|
||||||
|
}
|
||||||
|
if !inFM {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect alias list items (lines starting with " - ").
|
||||||
|
if inAliases {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(trimmed, "- ") {
|
||||||
|
aliases = append(aliases, strings.TrimPrefix(trimmed, "- "))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
inAliases = false // end of alias block
|
||||||
|
}
|
||||||
|
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "title":
|
||||||
|
title = strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
case "aliases":
|
||||||
|
inAliases = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
83
ingestion/internal/wiki/inventory_test.go
Normal file
83
ingestion/internal/wiki/inventory_test.go
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
// ingestion/internal/wiki/inventory_test.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLoadInventory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "concepts", "domain-driven-design.md"),
|
||||||
|
[]byte("---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA thing.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "entities", "ryan-singer.md"),
|
||||||
|
[]byte("---\ntitle: Ryan Singer\n---\n\n## Description\n\nDesigner.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Len(t, inv[PageTypeConcept], 1)
|
||||||
|
assert.Equal(t, "domain-driven-design", inv[PageTypeConcept][0].Slug)
|
||||||
|
assert.Equal(t, "Domain Driven Design", inv[PageTypeConcept][0].Title)
|
||||||
|
|
||||||
|
assert.Len(t, inv[PageTypeEntity], 1)
|
||||||
|
assert.Equal(t, "ryan-singer", inv[PageTypeEntity][0].Slug)
|
||||||
|
|
||||||
|
assert.Empty(t, inv[PageTypeSource])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadInventory_EmptyDirs(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, inv[PageTypeConcept])
|
||||||
|
assert.Empty(t, inv[PageTypeEntity])
|
||||||
|
assert.Empty(t, inv[PageTypeSource])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadInventory_MissingDirsOk(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// No wiki/ subdirs at all
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, inv)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadInventory_ReadsAliases(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "entities", "ryan-singer.md"),
|
||||||
|
[]byte("---\ntitle: Ryan Singer\naliases:\n - Singer\n - R. Singer\n---\n\n## Description\n\nDesigner.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, inv[PageTypeEntity], 1)
|
||||||
|
e := inv[PageTypeEntity][0]
|
||||||
|
assert.Equal(t, "Ryan Singer", e.Title)
|
||||||
|
assert.Equal(t, []string{"Singer", "R. Singer"}, e.Aliases)
|
||||||
|
}
|
||||||
40
ingestion/internal/wiki/log.go
Normal file
40
ingestion/internal/wiki/log.go
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
// ingestion/internal/wiki/log.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AppendLog appends one ingestion record to brain/log.md.
|
||||||
|
func AppendLog(brainDir, source string, pages, warnings []string, date string) error {
|
||||||
|
var sb strings.Builder
|
||||||
|
fmt.Fprintf(&sb, "## %s — ingest\n\n", date)
|
||||||
|
fmt.Fprintf(&sb, "- **Source:** %s\n", source)
|
||||||
|
if len(pages) > 0 {
|
||||||
|
sb.WriteString("- **Pages written:**\n")
|
||||||
|
for _, p := range pages {
|
||||||
|
fmt.Fprintf(&sb, " - %s\n", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(warnings) > 0 {
|
||||||
|
sb.WriteString("- **Warnings:**\n")
|
||||||
|
for _, w := range warnings {
|
||||||
|
fmt.Fprintf(&sb, " - %s\n", w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
|
||||||
|
logPath := filepath.Join(brainDir, "log.md")
|
||||||
|
f, err := os.OpenFile(logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open log: %w", err)
|
||||||
|
}
|
||||||
|
if _, err = f.WriteString(sb.String()); err != nil {
|
||||||
|
f.Close() //nolint:errcheck
|
||||||
|
return fmt.Errorf("write log: %w", err)
|
||||||
|
}
|
||||||
|
return f.Close()
|
||||||
|
}
|
||||||
120
ingestion/internal/wiki/merge.go
Normal file
120
ingestion/internal/wiki/merge.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
// ingestion/internal/wiki/merge.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var bulletSections = map[string]bool{
|
||||||
|
"Related Concepts": true,
|
||||||
|
"Related Entities": true,
|
||||||
|
"Sources": true,
|
||||||
|
"Key Claims": true,
|
||||||
|
"Entities Mentioned": true,
|
||||||
|
"Concepts Introduced or Reinforced": true,
|
||||||
|
"Chapters": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
var appendSections = map[string]bool{
|
||||||
|
"Evolving Notes": true,
|
||||||
|
"Updates": true,
|
||||||
|
"Open Questions Raised": true,
|
||||||
|
"Open Questions": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
type section struct {
|
||||||
|
heading string
|
||||||
|
content string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge combines two Page values with the same path.
|
||||||
|
// Frontmatter is taken from a. Sections are merged by strategy:
|
||||||
|
// bullet sections union unique lines, append sections concatenate,
|
||||||
|
// all others keep a's version. Sections in b not present in a are appended.
|
||||||
|
func Merge(a, b Page) Page {
|
||||||
|
fmA, secsA := parseSections(a.Content)
|
||||||
|
_, secsB := parseSections(b.Content)
|
||||||
|
|
||||||
|
idx := make(map[string]int, len(secsA))
|
||||||
|
for i, s := range secsA {
|
||||||
|
idx[s.heading] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sB := range secsB {
|
||||||
|
i, exists := idx[sB.heading]
|
||||||
|
if !exists {
|
||||||
|
idx[sB.heading] = len(secsA)
|
||||||
|
secsA = append(secsA, sB)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sA := secsA[i]
|
||||||
|
switch {
|
||||||
|
case bulletSections[sB.heading]:
|
||||||
|
secsA[i].content = mergeBullets(sA.content, sB.content)
|
||||||
|
case appendSections[sB.heading]:
|
||||||
|
secsA[i].content = strings.TrimRight(sA.content, "\n") + "\n\n" + strings.TrimLeft(sB.content, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Page{Path: a.Path, Content: rebuildContent(fmA, secsA)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSections(markdown string) (frontmatter string, sections []section) {
|
||||||
|
lines := strings.Split(markdown, "\n")
|
||||||
|
i := 0
|
||||||
|
|
||||||
|
if i < len(lines) && strings.TrimSpace(lines[i]) == "---" {
|
||||||
|
i++
|
||||||
|
var fmLines []string
|
||||||
|
for i < len(lines) {
|
||||||
|
if strings.TrimSpace(lines[i]) == "---" {
|
||||||
|
i++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmLines = append(fmLines, lines[i])
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
frontmatter = fmt.Sprintf("---\n%s\n---\n", strings.Join(fmLines, "\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
var cur *section
|
||||||
|
for ; i < len(lines); i++ {
|
||||||
|
line := lines[i]
|
||||||
|
if strings.HasPrefix(line, "## ") {
|
||||||
|
if cur != nil {
|
||||||
|
sections = append(sections, *cur)
|
||||||
|
}
|
||||||
|
cur = §ion{heading: strings.TrimPrefix(line, "## ")}
|
||||||
|
} else if cur != nil {
|
||||||
|
cur.content += line + "\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if cur != nil {
|
||||||
|
sections = append(sections, *cur)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func rebuildContent(frontmatter string, sections []section) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(frontmatter)
|
||||||
|
for _, sec := range sections {
|
||||||
|
fmt.Fprintf(&sb, "\n## %s\n\n%s", sec.heading, sec.content)
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeBullets(a, b string) string {
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
var lines []string
|
||||||
|
for _, line := range strings.Split(a+b, "\n") {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if trimmed == "" || seen[trimmed] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[trimmed] = true
|
||||||
|
lines = append(lines, line)
|
||||||
|
}
|
||||||
|
return strings.Join(lines, "\n") + "\n"
|
||||||
|
}
|
||||||
55
ingestion/internal/wiki/merge_test.go
Normal file
55
ingestion/internal/wiki/merge_test.go
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// ingestion/internal/wiki/merge_test.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMerge_BulletSectionsUnion(t *testing.T) {
|
||||||
|
a := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Related Concepts\n\n- [[bar|Bar]]\n"}
|
||||||
|
b := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Related Concepts\n\n- [[bar|Bar]]\n- [[baz|Baz]]\n"}
|
||||||
|
|
||||||
|
got := Merge(a, b)
|
||||||
|
assert.Contains(t, got.Content, "[[bar|Bar]]")
|
||||||
|
assert.Contains(t, got.Content, "[[baz|Baz]]")
|
||||||
|
assert.Equal(t, 1, strings.Count(got.Content, "[[bar|Bar]]"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMerge_AppendSections(t *testing.T) {
|
||||||
|
a := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Evolving Notes\n\nFirst note.\n"}
|
||||||
|
b := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Evolving Notes\n\nSecond note.\n"}
|
||||||
|
|
||||||
|
got := Merge(a, b)
|
||||||
|
assert.Contains(t, got.Content, "First note.")
|
||||||
|
assert.Contains(t, got.Content, "Second note.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMerge_KeepFirstForOtherSections(t *testing.T) {
|
||||||
|
a := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFirst definition.\n"}
|
||||||
|
b := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nSecond definition.\n"}
|
||||||
|
|
||||||
|
got := Merge(a, b)
|
||||||
|
assert.Contains(t, got.Content, "First definition.")
|
||||||
|
assert.NotContains(t, got.Content, "Second definition.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMerge_NewSectionFromB(t *testing.T) {
|
||||||
|
a := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nA thing.\n"}
|
||||||
|
b := Page{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Why It Matters\n\nBecause reasons.\n"}
|
||||||
|
|
||||||
|
got := Merge(a, b)
|
||||||
|
assert.Contains(t, got.Content, "A thing.")
|
||||||
|
assert.Contains(t, got.Content, "Because reasons.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMerge_KeepsFrontmatterFromA(t *testing.T) {
|
||||||
|
a := Page{Path: "p.md", Content: "---\ntitle: A\nlast_updated: 2026-01-01\n---\n\n## Definition\n\nA.\n"}
|
||||||
|
b := Page{Path: "p.md", Content: "---\ntitle: B\nlast_updated: 2026-06-01\n---\n\n## Definition\n\nB.\n"}
|
||||||
|
|
||||||
|
got := Merge(a, b)
|
||||||
|
assert.Contains(t, got.Content, "title: A")
|
||||||
|
assert.NotContains(t, got.Content, "title: B")
|
||||||
|
}
|
||||||
28
ingestion/internal/wiki/slug.go
Normal file
28
ingestion/internal/wiki/slug.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// ingestion/internal/wiki/slug.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Slug converts a title to a kebab-case slug suitable for wiki filenames.
|
||||||
|
// Rules: lowercase, spaces/hyphens/underscores → hyphens, strip everything else.
|
||||||
|
func Slug(title string) string {
|
||||||
|
var b strings.Builder
|
||||||
|
prevHyphen := true // start true to trim leading hyphens
|
||||||
|
for _, r := range strings.ToLower(title) {
|
||||||
|
switch {
|
||||||
|
case r == ' ' || r == '-' || r == '_':
|
||||||
|
if !prevHyphen {
|
||||||
|
b.WriteRune('-')
|
||||||
|
prevHyphen = true
|
||||||
|
}
|
||||||
|
case unicode.IsLetter(r) || unicode.IsDigit(r):
|
||||||
|
b.WriteRune(r)
|
||||||
|
prevHyphen = false
|
||||||
|
// all other characters (apostrophes, colons, dots, etc.) are dropped
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimRight(b.String(), "-")
|
||||||
|
}
|
||||||
29
ingestion/internal/wiki/slug_test.go
Normal file
29
ingestion/internal/wiki/slug_test.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// ingestion/internal/wiki/slug_test.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSlug(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"Domain Driven Design", "domain-driven-design"},
|
||||||
|
{"It's Complicated", "its-complicated"},
|
||||||
|
{"gRPC", "grpc"},
|
||||||
|
{"GPT-4o", "gpt-4o"},
|
||||||
|
{"Property 1: It's Rough", "property-1-its-rough"},
|
||||||
|
{" leading spaces ", "leading-spaces"},
|
||||||
|
{"multiple spaces", "multiple-spaces"},
|
||||||
|
{"already-kebab", "already-kebab"},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.input, func(t *testing.T) {
|
||||||
|
assert.Equal(t, tc.want, Slug(tc.input))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
25
ingestion/internal/wiki/types.go
Normal file
25
ingestion/internal/wiki/types.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
// ingestion/internal/wiki/types.go
|
||||||
|
package wiki
|
||||||
|
|
||||||
|
// PageType identifies the wiki subdirectory for a page.
|
||||||
|
type PageType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
PageTypeConcept PageType = "concepts"
|
||||||
|
PageTypeEntity PageType = "entities"
|
||||||
|
PageTypeSource PageType = "sources"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Page is a wiki page to be written to disk.
|
||||||
|
type Page struct {
|
||||||
|
Path string // relative to brainDir, e.g. "wiki/sources/foo.md"
|
||||||
|
Content string // full markdown including YAML frontmatter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entry is a summary of an existing wiki page used to build the inventory.
|
||||||
|
type Entry struct {
|
||||||
|
Slug string
|
||||||
|
Title string
|
||||||
|
Aliases []string
|
||||||
|
Type PageType
|
||||||
|
}
|
||||||
76
internal/brain/client.go
Normal file
76
internal/brain/client.go
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
// internal/brain/client.go
|
||||||
|
// Package brain provides a lightweight client for querying the ingestion server.
|
||||||
|
// Skill handlers call Query before spawning workers to inject relevant knowledge
|
||||||
|
// from the brain into the task prompt. Errors are suppressed — the brain is
|
||||||
|
// optional context; its absence must never block a skill invocation.
|
||||||
|
package brain
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type queryResult struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Excerpt string `json:"excerpt"`
|
||||||
|
Score int `json:"score"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query calls the ingestion server and returns relevant knowledge as a
|
||||||
|
// formatted string ready to prepend to a worker task prompt.
|
||||||
|
// Returns empty string (no error) when baseURL or query is empty,
|
||||||
|
// when the brain is unreachable, or when no results are found.
|
||||||
|
func Query(ctx context.Context, baseURL, query string, limit int) (string, error) {
|
||||||
|
if baseURL == "" || strings.TrimSpace(query) == "" {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
if limit <= 0 {
|
||||||
|
limit = 3
|
||||||
|
}
|
||||||
|
|
||||||
|
body, _ := json.Marshal(map[string]any{"query": query, "limit": limit})
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, baseURL+"/query", bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("brain: build request failed", "err", err)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("brain: ingestion server unreachable", "err", err)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
slog.Warn("brain: ingestion server returned non-OK", "status", resp.StatusCode)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
out, _ := io.ReadAll(resp.Body)
|
||||||
|
var result struct {
|
||||||
|
Results []queryResult `json:"results"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(out, &result); err != nil || len(result.Results) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteString("## Relevant knowledge\n\n")
|
||||||
|
for _, r := range result.Results {
|
||||||
|
title := r.Title
|
||||||
|
if title == "" {
|
||||||
|
title = r.Path
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "### %s\n%s\n\n", title, r.Excerpt)
|
||||||
|
}
|
||||||
|
return b.String(), nil
|
||||||
|
}
|
||||||
67
internal/brain/client_test.go
Normal file
67
internal/brain/client_test.go
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
package brain_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/brain"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQueryEmptyBaseURL(t *testing.T) {
|
||||||
|
result, err := brain.Query(context.Background(), "", "tdd patterns", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQueryEmptyQuery(t *testing.T) {
|
||||||
|
result, err := brain.Query(context.Background(), "http://localhost:9999", "", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQueryFormatsResults(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/query", r.URL.Path)
|
||||||
|
var req map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||||
|
assert.Equal(t, "tdd patterns", req["query"])
|
||||||
|
|
||||||
|
json.NewEncoder(w).Encode(map[string]any{ //nolint:errcheck
|
||||||
|
"results": []map[string]any{
|
||||||
|
{"path": "knowledge/tdd.md", "title": "TDD Guide", "excerpt": "Always write tests first.", "score": 5},
|
||||||
|
{"path": "knowledge/go.md", "title": "Go Conventions", "excerpt": "Use table-driven tests.", "score": 3},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
result, err := brain.Query(context.Background(), srv.URL, "tdd patterns", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, result, "## Relevant knowledge")
|
||||||
|
assert.Contains(t, result, "TDD Guide")
|
||||||
|
assert.Contains(t, result, "Always write tests first.")
|
||||||
|
assert.Contains(t, result, "Go Conventions")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQueryEmptyResults(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
json.NewEncoder(w).Encode(map[string]any{"results": []any{}}) //nolint:errcheck
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
result, err := brain.Query(context.Background(), srv.URL, "obscure query", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQueryUnavailableServerReturnsEmpty(t *testing.T) {
|
||||||
|
// Brain unavailable — should degrade gracefully, no error
|
||||||
|
result, err := brain.Query(context.Background(), "http://127.0.0.1:19999", "query", 3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, result)
|
||||||
|
}
|
||||||
@@ -9,6 +9,8 @@ type Config struct {
|
|||||||
ConfigDir string // SUPERVISOR_CONFIG_DIR, default ./config/supervisor
|
ConfigDir string // SUPERVISOR_CONFIG_DIR, default ./config/supervisor
|
||||||
ModelsFile string // SUPERVISOR_MODELS_FILE, default <ConfigDir>/../models.yaml
|
ModelsFile string // SUPERVISOR_MODELS_FILE, default <ConfigDir>/../models.yaml
|
||||||
IngestBaseURL string // INGEST_BASE_URL, default http://localhost:3300
|
IngestBaseURL string // INGEST_BASE_URL, default http://localhost:3300
|
||||||
|
IngestSvcURL string // INGEST_SVC_URL — base URL for brain_ingest (/ingest, /ingest-path)
|
||||||
|
KBRetrievalURL string // KB_RETRIEVAL_URL — base URL for brain_search
|
||||||
SessionsDir string // SUPERVISOR_SESSIONS_DIR, default ./brain/sessions
|
SessionsDir string // SUPERVISOR_SESSIONS_DIR, default ./brain/sessions
|
||||||
BrainDir string // SUPERVISOR_BRAIN_DIR, default ./brain
|
BrainDir string // SUPERVISOR_BRAIN_DIR, default ./brain
|
||||||
}
|
}
|
||||||
@@ -22,6 +24,8 @@ func Load() (Config, error) {
|
|||||||
}
|
}
|
||||||
cfg.ModelsFile = envOr("SUPERVISOR_MODELS_FILE", cfg.ConfigDir+"/../models.yaml")
|
cfg.ModelsFile = envOr("SUPERVISOR_MODELS_FILE", cfg.ConfigDir+"/../models.yaml")
|
||||||
cfg.IngestBaseURL = envOr("INGEST_BASE_URL", "http://localhost:3300")
|
cfg.IngestBaseURL = envOr("INGEST_BASE_URL", "http://localhost:3300")
|
||||||
|
cfg.IngestSvcURL = envOr("INGEST_SVC_URL", "")
|
||||||
|
cfg.KBRetrievalURL = envOr("KB_RETRIEVAL_URL", "")
|
||||||
cfg.SessionsDir = envOr("SUPERVISOR_SESSIONS_DIR", "./brain/sessions")
|
cfg.SessionsDir = envOr("SUPERVISOR_SESSIONS_DIR", "./brain/sessions")
|
||||||
cfg.BrainDir = envOr("SUPERVISOR_BRAIN_DIR", "./brain")
|
cfg.BrainDir = envOr("SUPERVISOR_BRAIN_DIR", "./brain")
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
|
|||||||
@@ -12,8 +12,6 @@ type skillChain struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type modelsFile struct {
|
type modelsFile struct {
|
||||||
Verifier string `yaml:"verifier"`
|
|
||||||
LlamaSwapURL string `yaml:"llama_swap_url"`
|
|
||||||
DefaultChain []string `yaml:"default_chain"`
|
DefaultChain []string `yaml:"default_chain"`
|
||||||
Skills map[string]skillChain `yaml:"skills"`
|
Skills map[string]skillChain `yaml:"skills"`
|
||||||
}
|
}
|
||||||
@@ -34,23 +32,18 @@ func LoadModels(path string) (Models, error) {
|
|||||||
return Models{data: f}, nil
|
return Models{data: f}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verifier returns the model name to use for all local-tier output verification.
|
// ModelFor returns the primary model to use for a skill.
|
||||||
func (m Models) Verifier() string { return m.data.Verifier }
|
// If override is non-empty, it is returned directly.
|
||||||
|
// Falls back to default_chain[0] when the skill has no explicit entry.
|
||||||
// LlamaSwapURL returns the llama-swap base URL for warm-state probing.
|
func (m Models) ModelFor(skill, override string) string {
|
||||||
func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL }
|
|
||||||
|
|
||||||
// ChainFor returns the ordered list of model names for a skill.
|
|
||||||
// If override is non-empty, returns a single-entry chain (no escalation).
|
|
||||||
// Falls back to default_chain when the skill has no explicit entry.
|
|
||||||
func (m Models) ChainFor(skill, override string) []string {
|
|
||||||
if override != "" {
|
if override != "" {
|
||||||
return []string{override}
|
return override
|
||||||
}
|
}
|
||||||
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
||||||
return sc.Chain
|
return sc.Chain[0]
|
||||||
}
|
}
|
||||||
out := make([]string, len(m.data.DefaultChain))
|
if len(m.data.DefaultChain) > 0 {
|
||||||
copy(out, m.data.DefaultChain)
|
return m.data.DefaultChain[0]
|
||||||
return out
|
}
|
||||||
|
return ""
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,9 +11,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const testYAML = `
|
const testYAML = `
|
||||||
verifier: claude-sonnet-4-6
|
|
||||||
llama_swap_url: http://koala:8080
|
|
||||||
|
|
||||||
default_chain:
|
default_chain:
|
||||||
- ollama/qwen3-coder-30b-tuned
|
- ollama/qwen3-coder-30b-tuned
|
||||||
- claude-sonnet-4-6
|
- claude-sonnet-4-6
|
||||||
@@ -37,44 +34,20 @@ func writeModels(t *testing.T, content string) string {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModelsVerifier(t *testing.T) {
|
func TestModelsModelForSkillWithEntry(t *testing.T) {
|
||||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "claude-sonnet-4-6", m.Verifier())
|
assert.Equal(t, "ollama/devstral-tuned", m.ModelFor("review", ""))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModelsLlamaSwapURL(t *testing.T) {
|
func TestModelsModelForDefaultFallback(t *testing.T) {
|
||||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "http://koala:8080", m.LlamaSwapURL())
|
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.ModelFor("trainer", ""))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModelsChainForSkillOverride(t *testing.T) {
|
func TestModelsModelForCallerOverride(t *testing.T) {
|
||||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "claude-opus-4-6", m.ModelFor("review", "claude-opus-4-6"))
|
||||||
chain := m.ChainFor("review", "")
|
|
||||||
require.Len(t, chain, 3)
|
|
||||||
assert.Equal(t, "ollama/devstral-tuned", chain[0])
|
|
||||||
assert.Equal(t, "ollama/gemma4", chain[1])
|
|
||||||
assert.Equal(t, "claude-sonnet-4-6", chain[2])
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestModelsChainForDefaultFallback(t *testing.T) {
|
|
||||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
chain := m.ChainFor("trainer", "") // not in skills map
|
|
||||||
require.Len(t, chain, 2)
|
|
||||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0])
|
|
||||||
assert.Equal(t, "claude-sonnet-4-6", chain[1])
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestModelsChainForCallerOverride(t *testing.T) {
|
|
||||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
chain := m.ChainFor("review", "claude-opus-4-6")
|
|
||||||
require.Len(t, chain, 1)
|
|
||||||
assert.Equal(t, "claude-opus-4-6", chain[0])
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,111 +0,0 @@
|
|||||||
package exec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Config holds executor configuration.
|
|
||||||
type Config struct {
|
|
||||||
ClaudeBinary string // path to claude binary, defaults to "claude"
|
|
||||||
SystemPrompt string // contents of supervisor CLAUDE.md
|
|
||||||
Timeout time.Duration // per-invocation timeout, default 120s
|
|
||||||
LiteLLMBaseURL string // passed to Claude so it can delegate to Ollama
|
|
||||||
LiteLLMAPIKey string // passed to Claude for LiteLLM auth
|
|
||||||
}
|
|
||||||
|
|
||||||
// Request is the input to a single supervisor invocation.
|
|
||||||
type Request struct {
|
|
||||||
SkillPrompt string // skill-specific discipline (e.g. tdd.md contents)
|
|
||||||
TaskPrompt string // the specific task (phase, project_root, spec, model)
|
|
||||||
Model string // resolved model name, passed in task prompt
|
|
||||||
Tools string // comma-separated allowed tools, default "Bash,Read,Write"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Executor spawns a claude instance and captures its structured JSON output.
|
|
||||||
type Executor struct {
|
|
||||||
cfg Config
|
|
||||||
}
|
|
||||||
|
|
||||||
func New(cfg Config) *Executor {
|
|
||||||
if cfg.ClaudeBinary == "" {
|
|
||||||
cfg.ClaudeBinary = "claude"
|
|
||||||
}
|
|
||||||
if cfg.Timeout == 0 {
|
|
||||||
cfg.Timeout = 120 * time.Second
|
|
||||||
}
|
|
||||||
return &Executor{cfg: cfg}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
tools := req.Tools
|
|
||||||
if tools == "" {
|
|
||||||
tools = "Bash,Read,Write"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build the full prompt: system rules + skill rules + infra context + task.
|
|
||||||
// LITELLM_API_KEY is injected as a subprocess env var, not in the prompt,
|
|
||||||
// to prevent it appearing in error log output.
|
|
||||||
litellmCtx := fmt.Sprintf("LITELLM_BASE_URL: %s", e.cfg.LiteLLMBaseURL)
|
|
||||||
prompt := strings.Join([]string{
|
|
||||||
e.cfg.SystemPrompt,
|
|
||||||
"---",
|
|
||||||
req.SkillPrompt,
|
|
||||||
"---",
|
|
||||||
litellmCtx,
|
|
||||||
"---",
|
|
||||||
req.TaskPrompt,
|
|
||||||
}, "\n\n")
|
|
||||||
|
|
||||||
args := []string{
|
|
||||||
"--print",
|
|
||||||
"--permission-mode", "bypassPermissions",
|
|
||||||
"--tools", tools,
|
|
||||||
"--json-schema", Schema,
|
|
||||||
"--output-format", "json",
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(req.Model, "claude-") {
|
|
||||||
args = append(args, "--model", req.Model)
|
|
||||||
}
|
|
||||||
args = append(args, prompt)
|
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
|
||||||
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
|
||||||
var stdout, stderr bytes.Buffer
|
|
||||||
cmd.Stdout = &stdout
|
|
||||||
cmd.Stderr = &stderr
|
|
||||||
|
|
||||||
if err := cmd.Run(); err != nil {
|
|
||||||
if ctx.Err() != nil {
|
|
||||||
return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout)
|
|
||||||
}
|
|
||||||
return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
// --output-format json wraps the response in an envelope; structured output
|
|
||||||
// from --json-schema is in the "structured_output" field.
|
|
||||||
var envelope struct {
|
|
||||||
StructuredOutput *Result `json:"structured_output"`
|
|
||||||
IsError bool `json:"is_error"`
|
|
||||||
Result string `json:"result"` // fallback text result for error messages
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
|
|
||||||
return Result{}, fmt.Errorf("parse envelope JSON: %w — raw: %s — stderr: %s", err, stdout.String(), stderr.String())
|
|
||||||
}
|
|
||||||
if envelope.StructuredOutput == nil {
|
|
||||||
return Result{}, fmt.Errorf("no structured_output in response — result: %s — stderr: %s", envelope.Result, stderr.String())
|
|
||||||
}
|
|
||||||
if err := envelope.StructuredOutput.Validate(); err != nil {
|
|
||||||
return Result{}, fmt.Errorf("invalid result: %w", err)
|
|
||||||
}
|
|
||||||
return *envelope.StructuredOutput, nil
|
|
||||||
}
|
|
||||||
@@ -1,132 +0,0 @@
|
|||||||
package exec_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
// fakeClaudePath writes a shell script that prints fixed output and returns its path.
|
|
||||||
func fakeClaudePath(t *testing.T, output string, exitCode int) string {
|
|
||||||
t.Helper()
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
var content string
|
|
||||||
if exitCode != 0 {
|
|
||||||
content = "#!/bin/sh\necho 'error' >&2\nexit 1\n"
|
|
||||||
} else {
|
|
||||||
content = "#!/bin/sh\necho '" + output + "'\n"
|
|
||||||
}
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
|
||||||
return script
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecutorParsesValidResult(t *testing.T) {
|
|
||||||
// Fake claude emits the --output-format json envelope that the real CLI produces.
|
|
||||||
// The executor extracts the result from the "structured_output" field.
|
|
||||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}}`
|
|
||||||
claude := fakeClaudePath(t, envelope, 0)
|
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
|
||||||
ClaudeBinary: claude,
|
|
||||||
SystemPrompt: "you are a supervisor",
|
|
||||||
Timeout: 5 * time.Second,
|
|
||||||
})
|
|
||||||
|
|
||||||
result, err := ex.Run(context.Background(), iexec.Request{
|
|
||||||
SkillPrompt: "tdd rules",
|
|
||||||
TaskPrompt: "run red phase",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "pass", result.Status)
|
|
||||||
assert.True(t, result.Verified)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) {
|
|
||||||
claude := fakeClaudePath(t, "", 1)
|
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
|
||||||
ClaudeBinary: claude,
|
|
||||||
SystemPrompt: "you are a supervisor",
|
|
||||||
Timeout: 5 * time.Second,
|
|
||||||
})
|
|
||||||
|
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"})
|
|
||||||
assert.Error(t, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecutorTimesOut(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755))
|
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
|
||||||
ClaudeBinary: script,
|
|
||||||
SystemPrompt: "you are a supervisor",
|
|
||||||
Timeout: 100 * time.Millisecond,
|
|
||||||
})
|
|
||||||
|
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
|
||||||
assert.ErrorContains(t, err, "timeout")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
|
|
||||||
// The script captures its args to a temp file so we can assert --model was passed.
|
|
||||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
|
||||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
|
||||||
ClaudeBinary: script,
|
|
||||||
SystemPrompt: "sys",
|
|
||||||
Timeout: 5 * time.Second,
|
|
||||||
})
|
|
||||||
|
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{
|
|
||||||
SkillPrompt: "review rules",
|
|
||||||
TaskPrompt: "do review",
|
|
||||||
Model: "claude-sonnet-4-6",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
argsData, err := os.ReadFile(argsFile)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
|
|
||||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
|
||||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
|
||||||
|
|
||||||
ex := iexec.New(iexec.Config{
|
|
||||||
ClaudeBinary: script,
|
|
||||||
SystemPrompt: "sys",
|
|
||||||
Timeout: 5 * time.Second,
|
|
||||||
})
|
|
||||||
|
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{
|
|
||||||
SkillPrompt: "review rules",
|
|
||||||
TaskPrompt: "do review",
|
|
||||||
Model: "ollama/devstral",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
argsData, err := os.ReadFile(argsFile)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.NotContains(t, string(argsData), "--model")
|
|
||||||
}
|
|
||||||
@@ -6,12 +6,12 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
|
||||||
// Local models are expected to return a JSON object matching the Result schema
|
// and returns the raw assistant message text.
|
||||||
// as their response content — no envelope.
|
|
||||||
type LiteLLMExecutor struct {
|
type LiteLLMExecutor struct {
|
||||||
baseURL string
|
baseURL string
|
||||||
apiKey string
|
apiKey string
|
||||||
@@ -21,6 +21,9 @@ type LiteLLMExecutor struct {
|
|||||||
// NewLiteLLM creates a LiteLLMExecutor.
|
// NewLiteLLM creates a LiteLLMExecutor.
|
||||||
// timeout applies to the full HTTP round-trip per call.
|
// timeout applies to the full HTTP round-trip per call.
|
||||||
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||||
|
if timeout == 0 {
|
||||||
|
timeout = 120 * time.Second
|
||||||
|
}
|
||||||
return &LiteLLMExecutor{
|
return &LiteLLMExecutor{
|
||||||
baseURL: baseURL,
|
baseURL: baseURL,
|
||||||
apiKey: apiKey,
|
apiKey: apiKey,
|
||||||
@@ -46,58 +49,79 @@ type litellmResponse struct {
|
|||||||
Choices []litellmChoice `json:"choices"`
|
Choices []litellmChoice `json:"choices"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run dispatches req to the LiteLLM server and parses the Result from the
|
// Complete sends system+user messages to the given model and returns the raw
|
||||||
// assistant message content. Returns an error on network failure, non-200
|
// assistant text along with the round-trip duration in milliseconds.
|
||||||
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
|
||||||
// as automatic escalation triggers.
|
|
||||||
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
|
||||||
body := litellmRequest{
|
body := litellmRequest{
|
||||||
Model: req.Model,
|
Model: model,
|
||||||
Messages: []litellmMessage{
|
Messages: []litellmMessage{
|
||||||
{Role: "system", Content: req.SkillPrompt},
|
{Role: "system", Content: system},
|
||||||
{Role: "user", Content: req.TaskPrompt},
|
{Role: "user", Content: user},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
bodyBytes, err := json.Marshal(body)
|
bodyBytes, err := json.Marshal(body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
return "", 0, fmt.Errorf("litellm: create request: %w", err)
|
||||||
}
|
}
|
||||||
httpReq.Header.Set("Content-Type", "application/json")
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
if e.apiKey != "" {
|
if e.apiKey != "" {
|
||||||
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t0 := time.Now()
|
||||||
resp, err := e.httpClient.Do(httpReq)
|
resp, err := e.httpClient.Do(httpReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
return "", 0, fmt.Errorf("litellm: request failed: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close() //nolint:errcheck
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
durationMs := time.Since(t0).Milliseconds()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
var chatResp litellmResponse
|
var chatResp litellmResponse
|
||||||
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||||
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
return "", 0, fmt.Errorf("litellm: decode response: %w", err)
|
||||||
}
|
}
|
||||||
if len(chatResp.Choices) == 0 {
|
if len(chatResp.Choices) == 0 {
|
||||||
return Result{}, fmt.Errorf("litellm: no choices in response")
|
return "", 0, fmt.Errorf("litellm: no choices in response")
|
||||||
}
|
}
|
||||||
|
|
||||||
content := chatResp.Choices[0].Message.Content
|
return stripResultJSON(chatResp.Choices[0].Message.Content), durationMs, nil
|
||||||
var result Result
|
|
||||||
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
|
||||||
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
|
||||||
}
|
}
|
||||||
if err := result.Validate(); err != nil {
|
|
||||||
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
// stripResultJSON removes trailing ```json blocks that match the old structured
|
||||||
|
// result schema (containing "status" and "phase" keys). Some local models produce
|
||||||
|
// correct markdown prose but then append the old JSON format out of habit.
|
||||||
|
func stripResultJSON(text string) string {
|
||||||
|
const fence = "```json"
|
||||||
|
idx := len(text) - 1
|
||||||
|
// Walk backwards past trailing whitespace.
|
||||||
|
for idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' || text[idx] == ' ') {
|
||||||
|
idx--
|
||||||
}
|
}
|
||||||
return result, nil
|
// Must end with closing fence.
|
||||||
|
if idx < 2 || text[idx-2:idx+1] != "```" {
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
// Find the matching opening fence.
|
||||||
|
start := len(text[:idx-2]) - 1
|
||||||
|
for start >= 0 {
|
||||||
|
if start+len(fence) <= len(text) && text[start:start+len(fence)] == fence {
|
||||||
|
block := text[start : idx+1]
|
||||||
|
if strings.Contains(block, `"status"`) && strings.Contains(block, `"phase"`) {
|
||||||
|
return strings.TrimRight(text[:start], " \t\r\n")
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
start--
|
||||||
|
}
|
||||||
|
return text
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,23 +13,11 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func validLiteLLMResult() iexec.Result {
|
func chatResponse(t *testing.T, content string) []byte {
|
||||||
return iexec.Result{
|
|
||||||
Status: "pass",
|
|
||||||
Phase: "review",
|
|
||||||
Skill: "review",
|
|
||||||
ModelUsed: "ollama/devstral",
|
|
||||||
Message: "looks good",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
|
||||||
t.Helper()
|
t.Helper()
|
||||||
content, err := json.Marshal(result)
|
|
||||||
require.NoError(t, err)
|
|
||||||
resp := map[string]any{
|
resp := map[string]any{
|
||||||
"choices": []map[string]any{
|
"choices": []map[string]any{
|
||||||
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
{"message": map[string]any{"role": "assistant", "content": content}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
data, err := json.Marshal(resp)
|
data, err := json.Marshal(resp)
|
||||||
@@ -37,25 +25,21 @@ func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
|||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLiteLLMParsesValidResult(t *testing.T) {
|
func TestLiteLLMReturnsText(t *testing.T) {
|
||||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||||
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
_, _ = w.Write(chatResponse(t, "here is my analysis"))
|
||||||
}))
|
}))
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
result, err := ex.Run(context.Background(), iexec.Request{
|
text, dur, err := ex.Complete(context.Background(), "ollama/devstral", "system prompt", "user prompt")
|
||||||
SkillPrompt: "review rules",
|
|
||||||
TaskPrompt: "review the code",
|
|
||||||
Model: "ollama/devstral",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "pass", result.Status)
|
assert.Equal(t, "here is my analysis", text)
|
||||||
assert.Equal(t, "review", result.Skill)
|
assert.GreaterOrEqual(t, dur, int64(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||||
@@ -63,12 +47,12 @@ func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
|||||||
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
_, _ = w.Write(chatResponse(t, "ok"))
|
||||||
}))
|
}))
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,34 +63,62 @@ func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
|||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||||
assert.ErrorContains(t, err, "503")
|
assert.ErrorContains(t, err, "503")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
func TestLiteLLMErrorOnEmptyChoices(t *testing.T) {
|
||||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
resp := map[string]any{
|
_, _ = w.Write([]byte(`{"choices":[]}`))
|
||||||
"choices": []map[string]any{
|
|
||||||
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
data, _ := json.Marshal(resp)
|
|
||||||
_, _ = w.Write(data)
|
|
||||||
}))
|
}))
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||||
assert.Error(t, err)
|
assert.ErrorContains(t, err, "no choices")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMStripsTrailingResultJSON(t *testing.T) {
|
||||||
|
content := "## Hypotheses\n\n**H1 (high):** nil map access.\n\n```json\n{\n \"status\": \"pass\",\n \"phase\": \"debug\",\n \"skill\": \"debug\"\n}\n```"
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponse(t, content))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
text, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, text, "nil map access")
|
||||||
|
assert.NotContains(t, text, `"status"`)
|
||||||
|
assert.NotContains(t, text, "```json")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLiteLLMKeepsNonResultJSONFence(t *testing.T) {
|
||||||
|
// A json block that is part of the actual answer (no status/phase) should be kept.
|
||||||
|
content := "Use this config:\n\n```json\n{\"model\": \"koala/phi4\"}\n```"
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(chatResponse(t, content))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||||
|
text, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, text, `"model"`)
|
||||||
|
assert.Contains(t, text, "```json")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
cancel() // Cancel immediately
|
cancel()
|
||||||
|
|
||||||
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||||
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
_, _, err := ex.Complete(ctx, "model", "sys", "user")
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,197 +0,0 @@
|
|||||||
package exec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ChainEntry is one tier in an escalation chain.
|
|
||||||
type ChainEntry struct {
|
|
||||||
Model string // e.g. "ollama/phi4", "claude-sonnet-4-6"
|
|
||||||
Tier string // "local" | "subagent" | "managed"
|
|
||||||
IsCloud bool // true for claude-* models; skips verifier call
|
|
||||||
}
|
|
||||||
|
|
||||||
// EntryFor builds a ChainEntry from a model name string.
|
|
||||||
func EntryFor(model string) ChainEntry {
|
|
||||||
cloud := strings.HasPrefix(model, "claude-")
|
|
||||||
tier := "local"
|
|
||||||
if cloud {
|
|
||||||
tier = "subagent"
|
|
||||||
}
|
|
||||||
return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AttemptRecord captures the outcome of one tier attempt for session logging.
|
|
||||||
type AttemptRecord struct {
|
|
||||||
Model string
|
|
||||||
Tier string
|
|
||||||
DurationMs int64
|
|
||||||
WarmStart bool
|
|
||||||
Verdict string // "accept" | "escalate" | "error"
|
|
||||||
Feedback string
|
|
||||||
}
|
|
||||||
|
|
||||||
// VerifierFn is the interface the orchestrator uses to verify local output.
|
|
||||||
type VerifierFn interface {
|
|
||||||
Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
|
|
||||||
type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
|
|
||||||
|
|
||||||
// Orchestrator walks an escalation chain, delegating generation and verification.
|
|
||||||
// It implements the ExecutorFn shape expected by skill handlers.
|
|
||||||
type Orchestrator struct {
|
|
||||||
chain []ChainEntry
|
|
||||||
localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil
|
|
||||||
cloudRun ExecutorRunFn // for cloud tiers; may be nil
|
|
||||||
verifier VerifierFn
|
|
||||||
llamaSwapURL string
|
|
||||||
attempts *[]AttemptRecord
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewOrchestrator creates an Orchestrator.
|
|
||||||
// attempts is a pointer to a slice that will be appended to on each tier attempt.
|
|
||||||
// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
|
|
||||||
func NewOrchestrator(
|
|
||||||
chain []ChainEntry,
|
|
||||||
localRun ExecutorRunFn,
|
|
||||||
cloudRun ExecutorRunFn,
|
|
||||||
verifier VerifierFn,
|
|
||||||
llamaSwapURL string,
|
|
||||||
attempts *[]AttemptRecord,
|
|
||||||
) *Orchestrator {
|
|
||||||
return &Orchestrator{
|
|
||||||
chain: chain,
|
|
||||||
localRun: localRun,
|
|
||||||
cloudRun: cloudRun,
|
|
||||||
verifier: verifier,
|
|
||||||
llamaSwapURL: llamaSwapURL,
|
|
||||||
attempts: attempts,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run walks the escalation chain and returns the first accepted result.
|
|
||||||
// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
|
|
||||||
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
|
|
||||||
taskPrompt := req.TaskPrompt
|
|
||||||
|
|
||||||
for _, entry := range o.chain {
|
|
||||||
warm := o.probeWarm(entry.Model)
|
|
||||||
start := time.Now()
|
|
||||||
|
|
||||||
tierReq := req
|
|
||||||
tierReq.Model = entry.Model
|
|
||||||
tierReq.TaskPrompt = taskPrompt
|
|
||||||
|
|
||||||
if entry.IsCloud {
|
|
||||||
result, genErr := o.cloudRun(ctx, tierReq)
|
|
||||||
dur := time.Since(start).Milliseconds()
|
|
||||||
verdict := "accept"
|
|
||||||
if genErr != nil {
|
|
||||||
verdict = "error"
|
|
||||||
}
|
|
||||||
o.appendAttempt(AttemptRecord{
|
|
||||||
Model: entry.Model,
|
|
||||||
Tier: entry.Tier,
|
|
||||||
DurationMs: dur,
|
|
||||||
WarmStart: warm,
|
|
||||||
Verdict: verdict,
|
|
||||||
})
|
|
||||||
if genErr == nil {
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Local tier.
|
|
||||||
result, genErr := o.localRun(ctx, tierReq)
|
|
||||||
dur := time.Since(start).Milliseconds()
|
|
||||||
|
|
||||||
if genErr != nil {
|
|
||||||
o.appendAttempt(AttemptRecord{
|
|
||||||
Model: entry.Model,
|
|
||||||
Tier: entry.Tier,
|
|
||||||
DurationMs: dur,
|
|
||||||
WarmStart: warm,
|
|
||||||
Verdict: "error",
|
|
||||||
Feedback: genErr.Error(),
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
|
|
||||||
if verErr != nil {
|
|
||||||
// Treat verifier failure as escalate (safe default).
|
|
||||||
o.appendAttempt(AttemptRecord{
|
|
||||||
Model: entry.Model,
|
|
||||||
Tier: entry.Tier,
|
|
||||||
DurationMs: dur,
|
|
||||||
WarmStart: warm,
|
|
||||||
Verdict: "escalate",
|
|
||||||
Feedback: "verifier error: " + verErr.Error(),
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if verdict.Accept {
|
|
||||||
o.appendAttempt(AttemptRecord{
|
|
||||||
Model: entry.Model,
|
|
||||||
Tier: entry.Tier,
|
|
||||||
DurationMs: dur,
|
|
||||||
WarmStart: warm,
|
|
||||||
Verdict: "accept",
|
|
||||||
})
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
o.appendAttempt(AttemptRecord{
|
|
||||||
Model: entry.Model,
|
|
||||||
Tier: entry.Tier,
|
|
||||||
DurationMs: dur,
|
|
||||||
WarmStart: warm,
|
|
||||||
Verdict: "escalate",
|
|
||||||
Feedback: verdict.Feedback,
|
|
||||||
})
|
|
||||||
// Inject verifier feedback into the next tier's task prompt.
|
|
||||||
taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
|
||||||
}
|
|
||||||
|
|
||||||
return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
|
|
||||||
if o.attempts != nil {
|
|
||||||
*o.attempts = append(*o.attempts, rec)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// probeWarm checks whether the model is currently loaded in llama-swap.
|
|
||||||
// Returns false on any error or if llamaSwapURL is empty.
|
|
||||||
func (o *Orchestrator) probeWarm(model string) bool {
|
|
||||||
if o.llamaSwapURL == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
resp, err := http.DefaultClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
defer resp.Body.Close() //nolint:errcheck
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return strings.Contains(string(body), model)
|
|
||||||
}
|
|
||||||
@@ -1,151 +0,0 @@
|
|||||||
package exec_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
// stubRunFn returns preset results sequentially.
|
|
||||||
type stubRunFn struct {
|
|
||||||
calls []stubCall
|
|
||||||
callIdx int
|
|
||||||
}
|
|
||||||
|
|
||||||
type stubCall struct {
|
|
||||||
result iexec.Result
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
|
|
||||||
if s.callIdx >= len(s.calls) {
|
|
||||||
return iexec.Result{}, errors.New("unexpected call")
|
|
||||||
}
|
|
||||||
c := s.calls[s.callIdx]
|
|
||||||
s.callIdx++
|
|
||||||
return c.result, c.err
|
|
||||||
}
|
|
||||||
|
|
||||||
// stubVerifier returns preset verdicts sequentially.
|
|
||||||
type stubVerifier struct {
|
|
||||||
verdicts []iexec.Verdict
|
|
||||||
idx int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
|
|
||||||
if s.idx >= len(s.verdicts) {
|
|
||||||
return iexec.Verdict{}, errors.New("unexpected verify call")
|
|
||||||
}
|
|
||||||
v := s.verdicts[s.idx]
|
|
||||||
s.idx++
|
|
||||||
return v, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func okResult(skill string) iexec.Result {
|
|
||||||
return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestOrchestratorSingleLocalAccept(t *testing.T) {
|
|
||||||
local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
|
||||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
|
||||||
|
|
||||||
var attempts []iexec.AttemptRecord
|
|
||||||
orch := iexec.NewOrchestrator(
|
|
||||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
|
||||||
local.Run, nil, verifier, "", &attempts,
|
|
||||||
)
|
|
||||||
|
|
||||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "pass", result.Status)
|
|
||||||
require.Len(t, attempts, 1)
|
|
||||||
assert.Equal(t, "local", attempts[0].Tier)
|
|
||||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
|
|
||||||
local := &stubRunFn{calls: []stubCall{
|
|
||||||
{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
|
|
||||||
{result: okResult("review")},
|
|
||||||
}}
|
|
||||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{
|
|
||||||
{Accept: false, Feedback: "missing line refs"},
|
|
||||||
{Accept: true},
|
|
||||||
}}
|
|
||||||
|
|
||||||
var attempts []iexec.AttemptRecord
|
|
||||||
orch := iexec.NewOrchestrator(
|
|
||||||
[]iexec.ChainEntry{
|
|
||||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
|
||||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
|
||||||
},
|
|
||||||
local.Run, nil, verifier, "", &attempts,
|
|
||||||
)
|
|
||||||
|
|
||||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "pass", result.Status)
|
|
||||||
require.Len(t, attempts, 2)
|
|
||||||
assert.Equal(t, "escalate", attempts[0].Verdict)
|
|
||||||
assert.Equal(t, "missing line refs", attempts[0].Feedback)
|
|
||||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
|
|
||||||
local := &stubRunFn{calls: []stubCall{
|
|
||||||
{err: errors.New("network failure")},
|
|
||||||
{result: okResult("review")},
|
|
||||||
}}
|
|
||||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
|
||||||
|
|
||||||
var attempts []iexec.AttemptRecord
|
|
||||||
orch := iexec.NewOrchestrator(
|
|
||||||
[]iexec.ChainEntry{
|
|
||||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
|
||||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
|
||||||
},
|
|
||||||
local.Run, nil, verifier, "", &attempts,
|
|
||||||
)
|
|
||||||
|
|
||||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, attempts, 2)
|
|
||||||
assert.Equal(t, "error", attempts[0].Verdict)
|
|
||||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
|
|
||||||
cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
|
||||||
verifier := &stubVerifier{} // no verdicts — must not be called
|
|
||||||
|
|
||||||
var attempts []iexec.AttemptRecord
|
|
||||||
orch := iexec.NewOrchestrator(
|
|
||||||
[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
|
|
||||||
nil, cloud.Run, verifier, "", &attempts,
|
|
||||||
)
|
|
||||||
|
|
||||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "pass", result.Status)
|
|
||||||
require.Len(t, attempts, 1)
|
|
||||||
assert.Equal(t, "subagent", attempts[0].Tier)
|
|
||||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
|
||||||
assert.Equal(t, 0, verifier.idx) // verifier never called
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestOrchestratorAllTiersExhausted(t *testing.T) {
|
|
||||||
local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
|
|
||||||
|
|
||||||
var attempts []iexec.AttemptRecord
|
|
||||||
orch := iexec.NewOrchestrator(
|
|
||||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
|
||||||
local.Run, nil, &stubVerifier{}, "", &attempts,
|
|
||||||
)
|
|
||||||
|
|
||||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
|
||||||
assert.ErrorContains(t, err, "all tiers exhausted")
|
|
||||||
}
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
package exec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Result is the structured JSON output from every supervisor invocation.
|
|
||||||
// The JSON schema constant is passed to claude via --json-schema so Claude
|
|
||||||
// validates its own output before returning.
|
|
||||||
type Result struct {
|
|
||||||
Status string `json:"status"` // pass | fail | error
|
|
||||||
Phase string `json:"phase"` // red | green | refactor | retrospective | review | debug | spec | trainer
|
|
||||||
Skill string `json:"skill"` // tdd | review | ...
|
|
||||||
FilePath string `json:"file_path"` // absolute path to generated file
|
|
||||||
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
|
||||||
Verified bool `json:"verified"` // based on exit code, never self-report
|
|
||||||
ModelUsed string `json:"model_used"` // model name or "self"
|
|
||||||
Message string `json:"message"` // one sentence summary
|
|
||||||
}
|
|
||||||
|
|
||||||
var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true}
|
|
||||||
var validPhases = map[string]bool{
|
|
||||||
"red": true,
|
|
||||||
"green": true,
|
|
||||||
"refactor": true,
|
|
||||||
"retrospective": true,
|
|
||||||
"review": true,
|
|
||||||
"debug": true,
|
|
||||||
"spec": true,
|
|
||||||
"trainer": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r Result) Validate() error {
|
|
||||||
var errs []string
|
|
||||||
if !validStatuses[r.Status] {
|
|
||||||
errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
|
|
||||||
}
|
|
||||||
if !validPhases[r.Phase] {
|
|
||||||
errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase)
|
|
||||||
}
|
|
||||||
if r.Skill == "" {
|
|
||||||
errs = append(errs, "skill is required")
|
|
||||||
}
|
|
||||||
if len(errs) > 0 {
|
|
||||||
return errors.New(strings.Join(errs, "; "))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Schema is passed to claude --json-schema to enforce structured output.
|
|
||||||
const Schema = `{
|
|
||||||
"type": "object",
|
|
||||||
"required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
|
|
||||||
"properties": {
|
|
||||||
"status": {"type": "string", "enum": ["pass","fail","error"]},
|
|
||||||
"phase": {"type": "string"},
|
|
||||||
"skill": {"type": "string"},
|
|
||||||
"file_path": {"type": "string"},
|
|
||||||
"runner_output": {"type": "string"},
|
|
||||||
"verified": {"type": "boolean"},
|
|
||||||
"model_used": {"type": "string"},
|
|
||||||
"message": {"type": "string"}
|
|
||||||
}
|
|
||||||
}`
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
package exec_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestResultParsesValidJSON(t *testing.T) {
|
|
||||||
raw := `{
|
|
||||||
"status": "pass",
|
|
||||||
"phase": "red",
|
|
||||||
"skill": "tdd",
|
|
||||||
"file_path": "/tmp/foo_test.go",
|
|
||||||
"runner_output": "--- FAIL: TestFoo",
|
|
||||||
"verified": true,
|
|
||||||
"model_used": "self",
|
|
||||||
"message": "test fails as expected"
|
|
||||||
}`
|
|
||||||
var r exec.Result
|
|
||||||
require.NoError(t, json.Unmarshal([]byte(raw), &r))
|
|
||||||
assert.Equal(t, "pass", r.Status)
|
|
||||||
assert.Equal(t, "red", r.Phase)
|
|
||||||
assert.True(t, r.Verified)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResultValidation(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
result exec.Result
|
|
||||||
wantErr bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "valid pass result",
|
|
||||||
result: exec.Result{
|
|
||||||
Status: "pass", Phase: "red", Skill: "tdd",
|
|
||||||
FilePath: "/tmp/x_test.go", RunnerOutput: "FAIL",
|
|
||||||
Verified: true, ModelUsed: "self", Message: "ok",
|
|
||||||
},
|
|
||||||
wantErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "empty status",
|
|
||||||
result: exec.Result{Phase: "red", Skill: "tdd"},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "invalid status",
|
|
||||||
result: exec.Result{Status: "unknown", Phase: "red", Skill: "tdd"},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "invalid phase",
|
|
||||||
result: exec.Result{Status: "pass", Phase: "bad", Skill: "tdd"},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
err := tt.result.Validate()
|
|
||||||
if tt.wantErr {
|
|
||||||
assert.Error(t, err)
|
|
||||||
} else {
|
|
||||||
assert.NoError(t, err)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestValidateAcceptsAllPhases(t *testing.T) {
|
|
||||||
phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"}
|
|
||||||
for _, phase := range phases {
|
|
||||||
r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"}
|
|
||||||
assert.NoError(t, r.Validate(), "phase %q should be valid", phase)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
package exec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Verdict is the output of a Claude verification call.
|
|
||||||
type Verdict struct {
|
|
||||||
Accept bool `json:"accept"`
|
|
||||||
Feedback string `json:"feedback"` // empty when Accept is true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verifier runs a focused Claude call to judge local model output.
|
|
||||||
type Verifier struct {
|
|
||||||
claudeBinary string
|
|
||||||
model string
|
|
||||||
timeout time.Duration
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
|
||||||
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
|
||||||
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
|
||||||
if claudeBinary == "" {
|
|
||||||
claudeBinary = "claude"
|
|
||||||
}
|
|
||||||
if timeout == 0 {
|
|
||||||
timeout = 30 * time.Second
|
|
||||||
}
|
|
||||||
return &Verifier{
|
|
||||||
claudeBinary: claudeBinary,
|
|
||||||
model: model,
|
|
||||||
timeout: timeout,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
|
||||||
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
|
||||||
// to escalate. Returns an error on subprocess failure or unparseable response.
|
|
||||||
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
outputJSON, err := json.Marshal(output)
|
|
||||||
if err != nil {
|
|
||||||
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
|
||||||
|
|
||||||
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
|
||||||
|
|
||||||
Reply with JSON only — no other text:
|
|
||||||
{"accept": true, "feedback": ""}
|
|
||||||
or
|
|
||||||
{"accept": false, "feedback": "<one sentence reason>"}
|
|
||||||
|
|
||||||
## Skill discipline
|
|
||||||
%s
|
|
||||||
|
|
||||||
## Original task
|
|
||||||
%s
|
|
||||||
|
|
||||||
## Generated output
|
|
||||||
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
|
||||||
|
|
||||||
args := []string{
|
|
||||||
"--print",
|
|
||||||
"--permission-mode", "bypassPermissions",
|
|
||||||
}
|
|
||||||
if v.model != "" {
|
|
||||||
args = append(args, "--model", v.model)
|
|
||||||
}
|
|
||||||
args = append(args, prompt)
|
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
|
||||||
cmd.Env = os.Environ()
|
|
||||||
var stdout, stderr bytes.Buffer
|
|
||||||
cmd.Stdout = &stdout
|
|
||||||
cmd.Stderr = &stderr
|
|
||||||
|
|
||||||
if err := cmd.Run(); err != nil {
|
|
||||||
if ctx.Err() != nil {
|
|
||||||
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
|
||||||
}
|
|
||||||
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
var verdict Verdict
|
|
||||||
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
|
||||||
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
|
||||||
}
|
|
||||||
return verdict, nil
|
|
||||||
}
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
package exec_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
|
|
||||||
t.Helper()
|
|
||||||
data, err := json.Marshal(verdict)
|
|
||||||
require.NoError(t, err)
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
|
||||||
return script
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestVerifierAccepts(t *testing.T) {
|
|
||||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
|
|
||||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
|
||||||
|
|
||||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
|
||||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.True(t, verdict.Accept)
|
|
||||||
assert.Empty(t, verdict.Feedback)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestVerifierEscalates(t *testing.T) {
|
|
||||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
|
|
||||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
|
||||||
|
|
||||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
|
||||||
Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.False(t, verdict.Accept)
|
|
||||||
assert.Equal(t, "missing line references", verdict.Feedback)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
|
|
||||||
|
|
||||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
|
||||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
|
||||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
|
||||||
})
|
|
||||||
assert.Error(t, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestVerifierErrorOnNonZeroExit(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
script := filepath.Join(dir, "claude")
|
|
||||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
|
|
||||||
|
|
||||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
|
||||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
|
||||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
|
||||||
})
|
|
||||||
assert.Error(t, err)
|
|
||||||
}
|
|
||||||
@@ -36,3 +36,21 @@ func FormatHistory(entries []Entry, excludePhase string) string {
|
|||||||
}
|
}
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PrependHistory reads the session log for sessionID and prepends a formatted
|
||||||
|
// history block to task. Returns task unchanged if sessionID or sessionsDir is
|
||||||
|
// empty, or if no prior entries exist.
|
||||||
|
func PrependHistory(sessionsDir, sessionID, currentPhase, task string) string {
|
||||||
|
if sessionID == "" || sessionsDir == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
entries, err := Read(sessionsDir, sessionID)
|
||||||
|
if err != nil || len(entries) == 0 {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
history := FormatHistory(entries, currentPhase)
|
||||||
|
if history == "" {
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
return history + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,11 +2,13 @@
|
|||||||
package session_test
|
package session_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestFormatHistoryEmpty(t *testing.T) {
|
func TestFormatHistoryEmpty(t *testing.T) {
|
||||||
@@ -39,3 +41,45 @@ func TestFormatHistoryExcludesCurrentPhase(t *testing.T) {
|
|||||||
assert.Contains(t, result, "red done")
|
assert.Contains(t, result, "red done")
|
||||||
assert.NotContains(t, result, "green done")
|
assert.NotContains(t, result, "green done")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPrependHistoryNoSessionID(t *testing.T) {
|
||||||
|
result := session.PrependHistory("", "", "review", "do the task")
|
||||||
|
assert.Equal(t, "do the task", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrependHistoryNoLog(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||||
|
assert.Equal(t, "do the task", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrependHistoryPrependsHistory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
entry := session.Entry{
|
||||||
|
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||||
|
FinalStatus: "pass", Message: "wrote test",
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
}
|
||||||
|
require.NoError(t, session.Append(dir, "sess-abc", entry))
|
||||||
|
|
||||||
|
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||||
|
assert.Contains(t, result, "## Session history")
|
||||||
|
assert.Contains(t, result, "wrote test")
|
||||||
|
assert.True(t, strings.HasSuffix(result, "do the task"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrependHistoryExcludesCurrentPhase(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||||
|
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||||
|
FinalStatus: "pass", Message: "red done", Timestamp: time.Now(),
|
||||||
|
}))
|
||||||
|
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||||
|
SessionID: "sess-abc", Skill: "tdd", Phase: "green",
|
||||||
|
FinalStatus: "pass", Message: "green done", Timestamp: time.Now(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
result := session.PrependHistory(dir, "sess-abc", "green", "do the task")
|
||||||
|
assert.Contains(t, result, "red done")
|
||||||
|
assert.NotContains(t, result, "green done")
|
||||||
|
}
|
||||||
|
|||||||
@@ -10,13 +10,17 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Handle dispatches brain_query and brain_write tool calls.
|
// Handle dispatches brain tool calls.
|
||||||
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
switch tool {
|
switch tool {
|
||||||
case "brain_query":
|
case "brain_query":
|
||||||
return s.query(ctx, args)
|
return s.query(ctx, args)
|
||||||
case "brain_write":
|
case "brain_write":
|
||||||
return s.write(ctx, args)
|
return s.write(ctx, args)
|
||||||
|
case "brain_ingest":
|
||||||
|
return s.ingest(ctx, args)
|
||||||
|
case "brain_search":
|
||||||
|
return s.search(ctx, args)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown brain tool: %s", tool)
|
return nil, fmt.Errorf("unknown brain tool: %s", tool)
|
||||||
}
|
}
|
||||||
@@ -59,12 +63,74 @@ func (s *Skill) write(ctx context.Context, args json.RawMessage) (json.RawMessag
|
|||||||
return s.post(ctx, "/write", a)
|
return s.post(ctx, "/write", a)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ingestArgs struct {
|
||||||
|
Content string `json:"content,omitempty"`
|
||||||
|
Source string `json:"source,omitempty"`
|
||||||
|
Path string `json:"path,omitempty"`
|
||||||
|
DryRun bool `json:"dry_run,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) ingest(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var a ingestArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if s.cfg.IngestSvcURL == "" {
|
||||||
|
return nil, fmt.Errorf("brain_ingest: INGEST_SVC_URL not configured")
|
||||||
|
}
|
||||||
|
if a.Path != "" && a.Content != "" {
|
||||||
|
return nil, fmt.Errorf("path and content+source are mutually exclusive: provide one or the other")
|
||||||
|
}
|
||||||
|
if a.Path != "" {
|
||||||
|
return s.postTo(ctx, s.cfg.IngestSvcURL+"/ingest-path", map[string]any{
|
||||||
|
"path": a.Path,
|
||||||
|
"source": a.Source,
|
||||||
|
"dry_run": a.DryRun,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if a.Content != "" && a.Source != "" {
|
||||||
|
return s.postTo(ctx, s.cfg.IngestSvcURL+"/ingest", map[string]any{
|
||||||
|
"content": a.Content,
|
||||||
|
"source": a.Source,
|
||||||
|
"dry_run": a.DryRun,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("either content+source or path is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
type searchArgs struct {
|
||||||
|
Query string `json:"query"`
|
||||||
|
Collection string `json:"collection,omitempty"`
|
||||||
|
Limit int `json:"limit,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) search(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var a searchArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.Query == "" {
|
||||||
|
return nil, fmt.Errorf("query is required")
|
||||||
|
}
|
||||||
|
if a.Limit == 0 {
|
||||||
|
a.Limit = 5
|
||||||
|
}
|
||||||
|
if s.cfg.KBRetrievalURL == "" {
|
||||||
|
return nil, fmt.Errorf("brain_search: KB_RETRIEVAL_URL not configured")
|
||||||
|
}
|
||||||
|
return s.postTo(ctx, s.cfg.KBRetrievalURL+"/api/v1/search", a)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Skill) post(ctx context.Context, path string, body any) (json.RawMessage, error) {
|
func (s *Skill) post(ctx context.Context, path string, body any) (json.RawMessage, error) {
|
||||||
|
return s.postTo(ctx, s.cfg.IngestBaseURL+path, body)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Skill) postTo(ctx context.Context, url string, body any) (json.RawMessage, error) {
|
||||||
b, err := json.Marshal(body)
|
b, err := json.Marshal(body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("marshal request: %w", err)
|
return nil, fmt.Errorf("marshal request: %w", err)
|
||||||
}
|
}
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, s.cfg.IngestBaseURL+path, bytes.NewReader(b))
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(b))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("build request: %w", err)
|
return nil, fmt.Errorf("build request: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,3 +63,60 @@ func TestHandle_UnknownTool_ReturnsError(t *testing.T) {
|
|||||||
_, err := s.Handle(context.Background(), "brain_unknown", nil)
|
_, err := s.Handle(context.Background(), "brain_unknown", nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIngest_RoutesToIngestPath(t *testing.T) {
|
||||||
|
var capturedPath string
|
||||||
|
var capturedBody map[string]any
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
capturedPath = r.URL.Path
|
||||||
|
require.NoError(t, json.NewDecoder(r.Body).Decode(&capturedBody))
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"pages": []string{"wiki/foo.md"}})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := brain.New(brain.Config{IngestSvcURL: srv.URL})
|
||||||
|
args, _ := json.Marshal(map[string]any{"path": "/tmp/some-file.md"})
|
||||||
|
out, err := s.Handle(context.Background(), "brain_ingest", args)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, "/ingest-path", capturedPath)
|
||||||
|
assert.Equal(t, "/tmp/some-file.md", capturedBody["path"])
|
||||||
|
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
pages := result["pages"].([]any)
|
||||||
|
assert.Len(t, pages, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIngest_RoutesToIngest(t *testing.T) {
|
||||||
|
var capturedPath string
|
||||||
|
var capturedBody map[string]any
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
capturedPath = r.URL.Path
|
||||||
|
require.NoError(t, json.NewDecoder(r.Body).Decode(&capturedBody))
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"pages": []string{"wiki/bar.md"}})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := brain.New(brain.Config{IngestSvcURL: srv.URL})
|
||||||
|
args, _ := json.Marshal(map[string]any{"content": "some content", "source": "my-source.md"})
|
||||||
|
out, err := s.Handle(context.Background(), "brain_ingest", args)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, "/ingest", capturedPath)
|
||||||
|
assert.Equal(t, "some content", capturedBody["content"])
|
||||||
|
assert.Equal(t, "my-source.md", capturedBody["source"])
|
||||||
|
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
|
pages := result["pages"].([]any)
|
||||||
|
assert.Len(t, pages, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIngest_MissingRequiredFields(t *testing.T) {
|
||||||
|
s := brain.New(brain.Config{IngestSvcURL: "http://localhost:3300"})
|
||||||
|
args, _ := json.Marshal(map[string]any{})
|
||||||
|
_, err := s.Handle(context.Background(), "brain_ingest", args)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "either content+source or path is required")
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,7 +9,9 @@ import (
|
|||||||
|
|
||||||
// Config holds brain skill configuration.
|
// Config holds brain skill configuration.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
IngestBaseURL string // base URL of the ingestion HTTP server, e.g. http://localhost:3300
|
IngestBaseURL string // base URL of the ingestion HTTP server (brain_query, brain_write)
|
||||||
|
IngestSvcURL string // base URL of the ingestion-svc HTTP server (brain_ingest)
|
||||||
|
KBRetrievalURL string // base URL of the kb-retrieval server (brain_search)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill implements registry.Skill for brain_query and brain_write.
|
// Skill implements registry.Skill for brain_query and brain_write.
|
||||||
@@ -32,10 +34,10 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
str := map[string]any{"type": "string"}
|
str := map[string]any{"type": "string"}
|
||||||
num := map[string]any{"type": "integer"}
|
num := map[string]any{"type": "integer"}
|
||||||
|
|
||||||
return []registry.ToolDef{
|
tools := []registry.ToolDef{
|
||||||
{
|
{
|
||||||
Name: "brain_query",
|
Name: "brain_query",
|
||||||
Description: "Search the hyperguild brain wiki for relevant knowledge. Call this before starting any significant task.",
|
Description: "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files. Fast, no embeddings needed. Call before any significant task.",
|
||||||
InputSchema: schema([]string{"query"}, map[string]any{
|
InputSchema: schema([]string{"query"}, map[string]any{
|
||||||
"query": str,
|
"query": str,
|
||||||
"limit": num,
|
"limit": num,
|
||||||
@@ -43,7 +45,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "brain_write",
|
Name: "brain_write",
|
||||||
Description: "Write a raw knowledge note to the brain for later ingestion into the wiki.",
|
Description: "Write a raw knowledge note to brain/knowledge/ for later ingestion.",
|
||||||
InputSchema: schema([]string{"content"}, map[string]any{
|
InputSchema: schema([]string{"content"}, map[string]any{
|
||||||
"content": str,
|
"content": str,
|
||||||
"type": str,
|
"type": str,
|
||||||
@@ -52,4 +54,32 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
if s.cfg.IngestSvcURL != "" {
|
||||||
|
tools = append(tools, registry.ToolDef{
|
||||||
|
Name: "brain_ingest",
|
||||||
|
Description: "Ingest content into the brain wiki (brain/wiki/). Calls an LLM to produce structured wiki pages. " +
|
||||||
|
"Use for substantial documents, articles, or knowledge worth structuring. " +
|
||||||
|
"Provide EITHER (a) path — absolute path to a file or directory, " +
|
||||||
|
"OR (b) content + source — raw text and a human-readable name. " +
|
||||||
|
"Providing both is an error. Returns the list of wiki pages written.",
|
||||||
|
InputSchema: schema([]string{}, map[string]any{
|
||||||
|
"content": map[string]any{"type": "string", "description": "raw text to ingest; required when path is not set"},
|
||||||
|
"source": map[string]any{"type": "string", "description": "human-readable name for the content, e.g. 'shape-up-book'; required when path is not set"},
|
||||||
|
"path": map[string]any{"type": "string", "description": "absolute path to a file or directory to ingest; mutually exclusive with content+source"},
|
||||||
|
"dry_run": map[string]any{"type": "boolean"},
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if s.cfg.KBRetrievalURL != "" {
|
||||||
|
tools = append(tools, registry.ToolDef{
|
||||||
|
Name: "brain_search",
|
||||||
|
Description: "Semantic vector search across the brain wiki using embeddings. Use when brain_query returns no results or you need conceptually-related results rather than keyword matches.",
|
||||||
|
InputSchema: schema([]string{"query"}, map[string]any{
|
||||||
|
"query": str,
|
||||||
|
"collection": str,
|
||||||
|
"limit": num,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return tools
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
"github.com/mathiasbq/supervisor/internal/brain"
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -39,42 +40,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
|||||||
model = s.cfg.DefaultModel
|
model = s.cfg.DefaultModel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, a.Error+" "+a.Context, 3)
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
task := fmt.Sprintf(
|
||||||
"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
|
"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
|
||||||
a.ProjectRoot, a.Error, a.Context, model,
|
a.ProjectRoot, a.Error, a.Context, model,
|
||||||
)
|
)
|
||||||
task = s.prependHistory(a.SessionID, "debug", task)
|
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "debug", task)
|
||||||
|
if brainCtx != "" {
|
||||||
|
task = brainCtx + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
|
||||||
if s.cfg.ExecutorFn == nil {
|
if s.cfg.CompleteFunc == nil {
|
||||||
return nil, fmt.Errorf("no executor configured")
|
return nil, fmt.Errorf("no executor configured")
|
||||||
}
|
}
|
||||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
t0 := time.Now()
|
||||||
SkillPrompt: s.cfg.SkillPrompt,
|
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||||
TaskPrompt: task,
|
|
||||||
Model: model,
|
|
||||||
Tools: "Read,Bash",
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
b, err := json.Marshal(result)
|
|
||||||
if err != nil {
|
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||||
return nil, fmt.Errorf("marshal result: %w", err)
|
msg := text
|
||||||
|
if len(msg) > 200 {
|
||||||
|
msg = msg[:200]
|
||||||
}
|
}
|
||||||
return b, nil
|
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||||
|
SessionID: a.SessionID,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Skill: "debug",
|
||||||
|
Phase: "debug",
|
||||||
|
ProjectRoot: a.ProjectRoot,
|
||||||
|
FinalStatus: "ok",
|
||||||
|
ModelUsed: model,
|
||||||
|
DurationMs: time.Since(t0).Milliseconds(),
|
||||||
|
Message: msg,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
|
||||||
if err != nil || len(entries) == 0 {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
history := session.FormatHistory(entries, currentPhase)
|
|
||||||
if history == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
return history + "\n---\n\n" + task
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@@ -33,29 +32,22 @@ func TestDebugRequiresError(t *testing.T) {
|
|||||||
assert.ErrorContains(t, err, "error")
|
assert.ErrorContains(t, err, "error")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDebugCallsExecutor(t *testing.T) {
|
func TestDebugCallsCompleteFunc(t *testing.T) {
|
||||||
called := false
|
|
||||||
var capturedTask string
|
var capturedTask string
|
||||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
called = true
|
capturedTask = user
|
||||||
capturedTask = req.TaskPrompt
|
return "HYPOTHESIS 1 (high): nil map access. Verify: go test ./...", 90, nil
|
||||||
return iexec.Result{
|
|
||||||
Status: "pass", Phase: "debug", Skill: "debug",
|
|
||||||
RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference",
|
|
||||||
Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42",
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
sk := debug.New(debug.Config{SkillPrompt: "debug rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||||
out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
|
out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
|
||||||
`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
|
`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
|
||||||
))
|
))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.True(t, called)
|
|
||||||
assert.Contains(t, capturedTask, "panic: nil pointer dereference")
|
assert.Contains(t, capturedTask, "panic: nil pointer dereference")
|
||||||
assert.Contains(t, capturedTask, "occurs on startup")
|
assert.Contains(t, capturedTask, "occurs on startup")
|
||||||
|
|
||||||
var result iexec.Result
|
var result map[string]any
|
||||||
require.NoError(t, json.Unmarshal(out, &result))
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
assert.Equal(t, "debug", result.Phase)
|
assert.Contains(t, result["text"], "nil map access")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,19 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecutorFn is the function signature for running a worker subprocess.
|
// CompleteFunc is the function used to call a local model.
|
||||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||||
|
|
||||||
// Config holds dependencies for the debug skill.
|
// Config holds dependencies for the debug skill.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
SkillPrompt string
|
SkillPrompt string
|
||||||
DefaultModel string
|
DefaultModel string
|
||||||
ExecutorFn ExecutorFn
|
CompleteFunc CompleteFunc
|
||||||
SessionsDir string
|
SessionsDir string
|
||||||
|
IngestBaseURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill implements the debug MCP tool.
|
// Skill implements the debug MCP tool.
|
||||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
return []registry.ToolDef{
|
return []registry.ToolDef{
|
||||||
{
|
{
|
||||||
Name: "debug",
|
Name: "debug",
|
||||||
Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.",
|
Description: "Consult a local model to analyse an error and return hypotheses ordered by likelihood, each with a concrete verification step.",
|
||||||
InputSchema: schema(
|
InputSchema: schema(
|
||||||
[]string{"project_root", "error"},
|
[]string{"project_root", "error"},
|
||||||
map[string]any{
|
map[string]any{
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -33,7 +33,6 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
|||||||
model = s.cfg.DefaultModel
|
model = s.cfg.DefaultModel
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read session log entries (empty slice if no log exists yet).
|
|
||||||
entries, err := session.Read(s.cfg.SessionsDir, a.SessionID)
|
entries, err := session.Read(s.cfg.SessionsDir, a.SessionID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("read session log: %w", err)
|
return nil, fmt.Errorf("read session log: %w", err)
|
||||||
@@ -45,26 +44,33 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
taskPrompt := fmt.Sprintf(
|
taskPrompt := fmt.Sprintf(
|
||||||
"SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Write structured entries to brain/raw/ via brain_write. Return JSON result when done.",
|
"SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Provide structured insights.",
|
||||||
a.SessionID, string(logJSON),
|
a.SessionID, string(logJSON),
|
||||||
)
|
)
|
||||||
|
|
||||||
if s.cfg.ExecutorFn == nil {
|
if s.cfg.CompleteFunc == nil {
|
||||||
return nil, fmt.Errorf("no executor configured")
|
return nil, fmt.Errorf("no executor configured")
|
||||||
}
|
}
|
||||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
t0 := time.Now()
|
||||||
SkillPrompt: s.cfg.SkillPrompt,
|
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, taskPrompt)
|
||||||
TaskPrompt: taskPrompt,
|
|
||||||
Model: model,
|
|
||||||
Tools: "Bash,Read,Write",
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("retrospective worker: %w", err)
|
return nil, fmt.Errorf("retrospective model: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
b, err := json.Marshal(result)
|
msg := text
|
||||||
if err != nil {
|
if len(msg) > 200 {
|
||||||
return nil, fmt.Errorf("marshal result: %w", err)
|
msg = msg[:200]
|
||||||
}
|
}
|
||||||
return b, nil
|
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||||
|
SessionID: a.SessionID,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Skill: "retrospective",
|
||||||
|
Phase: "retrospective",
|
||||||
|
FinalStatus: "ok",
|
||||||
|
ModelUsed: model,
|
||||||
|
DurationMs: time.Since(t0).Milliseconds(),
|
||||||
|
Message: msg,
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@@ -20,20 +19,14 @@ func TestHandle_Retrospective_RequiresSessionID(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) {
|
func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) {
|
||||||
var capturedReq iexec.Request
|
var capturedTask string
|
||||||
s := retrospective.New(retrospective.Config{
|
s := retrospective.New(retrospective.Config{
|
||||||
SkillPrompt: "retrospective discipline",
|
SkillPrompt: "retrospective discipline",
|
||||||
DefaultModel: "ollama/test",
|
DefaultModel: "ollama/test",
|
||||||
SessionsDir: t.TempDir(), // empty dir, no session file — that's OK, session.Read returns nil
|
SessionsDir: t.TempDir(),
|
||||||
ExecutorFn: func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
CompleteFunc: func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
capturedReq = req
|
capturedTask = user
|
||||||
return iexec.Result{
|
return "Key insight: the team resolved a tricky nil pointer issue via careful logging.", 75, nil
|
||||||
Status: "pass",
|
|
||||||
Phase: "retrospective",
|
|
||||||
Skill: "retrospective",
|
|
||||||
Verified: true,
|
|
||||||
Message: "wrote 2 entries to brain",
|
|
||||||
}, nil
|
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -41,9 +34,8 @@ func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) {
|
|||||||
out, err := s.Handle(context.Background(), "retrospective", args)
|
out, err := s.Handle(context.Background(), "retrospective", args)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
var result iexec.Result
|
var result map[string]any
|
||||||
require.NoError(t, json.Unmarshal(out, &result))
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
assert.Equal(t, "pass", result.Status)
|
assert.Contains(t, result["text"], "nil pointer")
|
||||||
assert.Contains(t, capturedReq.SkillPrompt, "retrospective discipline")
|
assert.Contains(t, capturedTask, "empty-session")
|
||||||
assert.Contains(t, capturedReq.TaskPrompt, "empty-session")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,18 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecutorFn allows injecting a test double for the subprocess executor.
|
// CompleteFunc is the function used to call a local model.
|
||||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||||
|
|
||||||
// Config holds retrospective skill configuration.
|
// Config holds retrospective skill configuration.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
SkillPrompt string // content of retrospective.md
|
SkillPrompt string
|
||||||
DefaultModel string // model to use when not specified in args
|
DefaultModel string
|
||||||
SessionsDir string // path to brain/sessions/
|
SessionsDir string
|
||||||
ExecutorFn ExecutorFn // injected executor
|
CompleteFunc CompleteFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill implements registry.Skill for the retrospective tool.
|
// Skill implements registry.Skill for the retrospective tool.
|
||||||
@@ -36,7 +35,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
return []registry.ToolDef{
|
return []registry.ToolDef{
|
||||||
{
|
{
|
||||||
Name: "retrospective",
|
Name: "retrospective",
|
||||||
Description: "Run a retrospective on a completed session. Reads the session log, identifies novel learnings, and writes structured entries to the brain for ingestion. Call at the end of each coding session.",
|
Description: "Consult a local model to analyse a completed session and identify what is novel or worth preserving as organizational knowledge.",
|
||||||
InputSchema: json.RawMessage(`{
|
InputSchema: json.RawMessage(`{
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": ["session_id"],
|
"required": ["session_id"],
|
||||||
|
|||||||
@@ -6,8 +6,9 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
"github.com/mathiasbq/supervisor/internal/brain"
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -40,42 +41,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
|||||||
model = s.cfg.DefaultModel
|
model = s.cfg.DefaultModel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, strings.Join(a.Files, " ")+" "+a.Context, 3)
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
task := fmt.Sprintf(
|
||||||
"phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s",
|
"phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s",
|
||||||
a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model,
|
a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model,
|
||||||
)
|
)
|
||||||
task = s.prependHistory(a.SessionID, "review", task)
|
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "review", task)
|
||||||
|
if brainCtx != "" {
|
||||||
|
task = brainCtx + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
|
||||||
if s.cfg.ExecutorFn == nil {
|
if s.cfg.CompleteFunc == nil {
|
||||||
return nil, fmt.Errorf("no executor configured")
|
return nil, fmt.Errorf("no executor configured")
|
||||||
}
|
}
|
||||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
t0 := time.Now()
|
||||||
SkillPrompt: s.cfg.SkillPrompt,
|
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||||
TaskPrompt: task,
|
|
||||||
Model: model,
|
|
||||||
Tools: "Read,Bash",
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
b, err := json.Marshal(result)
|
|
||||||
if err != nil {
|
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||||
return nil, fmt.Errorf("marshal result: %w", err)
|
msg := text
|
||||||
|
if len(msg) > 200 {
|
||||||
|
msg = msg[:200]
|
||||||
}
|
}
|
||||||
return b, nil
|
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||||
|
SessionID: a.SessionID,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Skill: "review",
|
||||||
|
Phase: "review",
|
||||||
|
ProjectRoot: a.ProjectRoot,
|
||||||
|
FinalStatus: "ok",
|
||||||
|
ModelUsed: model,
|
||||||
|
DurationMs: time.Since(t0).Milliseconds(),
|
||||||
|
Message: msg,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
|
||||||
if err != nil || len(entries) == 0 {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
history := session.FormatHistory(entries, currentPhase)
|
|
||||||
if history == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
return history + "\n---\n\n" + task
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/review"
|
"github.com/mathiasbq/supervisor/internal/skills/review"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@@ -33,29 +32,22 @@ func TestReviewRequiresFiles(t *testing.T) {
|
|||||||
assert.ErrorContains(t, err, "files")
|
assert.ErrorContains(t, err, "files")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReviewCallsExecutor(t *testing.T) {
|
func TestReviewCallsCompleteFunc(t *testing.T) {
|
||||||
called := false
|
|
||||||
var capturedTask string
|
var capturedTask string
|
||||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
called = true
|
capturedTask = user
|
||||||
capturedTask = req.TaskPrompt
|
return "2 warnings found: missing error handling at line 42", 80, nil
|
||||||
return iexec.Result{
|
|
||||||
Status: "pass", Phase: "review", Skill: "review",
|
|
||||||
Verified: true, ModelUsed: "self", Message: "2 warnings found",
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sk := review.New(review.Config{SkillPrompt: "review rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
sk := review.New(review.Config{SkillPrompt: "review rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||||
out, err := sk.Handle(context.Background(), "review", json.RawMessage(
|
out, err := sk.Handle(context.Background(), "review", json.RawMessage(
|
||||||
`{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`,
|
`{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`,
|
||||||
))
|
))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.True(t, called)
|
|
||||||
assert.Contains(t, capturedTask, "internal/foo/foo.go")
|
assert.Contains(t, capturedTask, "internal/foo/foo.go")
|
||||||
assert.Contains(t, capturedTask, "PR: add Foo helper")
|
assert.Contains(t, capturedTask, "PR: add Foo helper")
|
||||||
|
|
||||||
var result iexec.Result
|
var result map[string]any
|
||||||
require.NoError(t, json.Unmarshal(out, &result))
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
assert.Equal(t, "pass", result.Status)
|
assert.Contains(t, result["text"], "2 warnings found")
|
||||||
assert.Equal(t, "review", result.Phase)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,19 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecutorFn is the function signature for running a worker subprocess.
|
// CompleteFunc is the function used to call a local model.
|
||||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||||
|
|
||||||
// Config holds dependencies for the review skill.
|
// Config holds dependencies for the review skill.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
SkillPrompt string
|
SkillPrompt string
|
||||||
DefaultModel string
|
DefaultModel string
|
||||||
ExecutorFn ExecutorFn
|
CompleteFunc CompleteFunc
|
||||||
SessionsDir string
|
SessionsDir string
|
||||||
|
IngestBaseURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill implements the review MCP tool.
|
// Skill implements the review MCP tool.
|
||||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
return []registry.ToolDef{
|
return []registry.ToolDef{
|
||||||
{
|
{
|
||||||
Name: "review",
|
Name: "review",
|
||||||
Description: "Perform a structured code review of the specified files. Returns findings with severity levels.",
|
Description: "Consult a local model for a structured code review of the specified files. Returns findings with severity levels.",
|
||||||
InputSchema: schema(
|
InputSchema: schema(
|
||||||
[]string{"project_root", "files"},
|
[]string{"project_root", "files"},
|
||||||
map[string]any{
|
map[string]any{
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
"github.com/mathiasbq/supervisor/internal/brain"
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -44,42 +45,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
|||||||
model = s.cfg.DefaultModel
|
model = s.cfg.DefaultModel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, a.Requirements+" "+a.Context, 3)
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
task := fmt.Sprintf(
|
||||||
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
||||||
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
||||||
)
|
)
|
||||||
task = s.prependHistory(a.SessionID, "spec", task)
|
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "spec", task)
|
||||||
|
if brainCtx != "" {
|
||||||
|
task = brainCtx + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
|
||||||
if s.cfg.ExecutorFn == nil {
|
if s.cfg.CompleteFunc == nil {
|
||||||
return nil, fmt.Errorf("no executor configured")
|
return nil, fmt.Errorf("no executor configured")
|
||||||
}
|
}
|
||||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
t0 := time.Now()
|
||||||
SkillPrompt: s.cfg.SkillPrompt,
|
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||||
TaskPrompt: task,
|
|
||||||
Model: model,
|
|
||||||
Tools: "Read,Write",
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
b, err := json.Marshal(result)
|
|
||||||
if err != nil {
|
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||||
return nil, fmt.Errorf("marshal result: %w", err)
|
msg := text
|
||||||
|
if len(msg) > 200 {
|
||||||
|
msg = msg[:200]
|
||||||
}
|
}
|
||||||
return b, nil
|
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||||
|
SessionID: a.SessionID,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Skill: "spec",
|
||||||
|
Phase: "spec",
|
||||||
|
ProjectRoot: a.ProjectRoot,
|
||||||
|
FinalStatus: "ok",
|
||||||
|
ModelUsed: model,
|
||||||
|
DurationMs: time.Since(t0).Milliseconds(),
|
||||||
|
Message: msg,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
|
||||||
if err != nil || len(entries) == 0 {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
history := session.FormatHistory(entries, currentPhase)
|
|
||||||
if history == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
return history + "\n---\n\n" + task
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@@ -33,29 +32,22 @@ func TestSpecRequiresRequirements(t *testing.T) {
|
|||||||
assert.ErrorContains(t, err, "requirements")
|
assert.ErrorContains(t, err, "requirements")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSpecCallsExecutor(t *testing.T) {
|
func TestSpecCallsCompleteFunc(t *testing.T) {
|
||||||
called := false
|
|
||||||
var capturedTask string
|
var capturedTask string
|
||||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
called = true
|
capturedTask = user
|
||||||
capturedTask = req.TaskPrompt
|
return "# OAuth2 Login Spec\n\n## Overview\nImplement OAuth2 login flow.", 110, nil
|
||||||
return iexec.Result{
|
|
||||||
Status: "pass", Phase: "spec", Skill: "spec",
|
|
||||||
FilePath: "/tmp/proj/docs/login-spec.md",
|
|
||||||
Verified: true, ModelUsed: "self", Message: "spec written: login feature",
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sk := spec.New(spec.Config{SkillPrompt: "spec rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
sk := spec.New(spec.Config{SkillPrompt: "spec rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||||
out, err := sk.Handle(context.Background(), "spec", json.RawMessage(
|
out, err := sk.Handle(context.Background(), "spec", json.RawMessage(
|
||||||
`{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`,
|
`{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`,
|
||||||
))
|
))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.True(t, called)
|
|
||||||
assert.Contains(t, capturedTask, "OAuth2 login")
|
assert.Contains(t, capturedTask, "OAuth2 login")
|
||||||
assert.Contains(t, capturedTask, "docs/login-spec.md")
|
assert.Contains(t, capturedTask, "docs/login-spec.md")
|
||||||
|
|
||||||
var result iexec.Result
|
var result map[string]any
|
||||||
require.NoError(t, json.Unmarshal(out, &result))
|
require.NoError(t, json.Unmarshal(out, &result))
|
||||||
assert.Equal(t, "spec", result.Phase)
|
assert.Contains(t, result["text"], "OAuth2 Login Spec")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,19 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecutorFn is the function signature for running a worker subprocess.
|
// CompleteFunc is the function used to call a local model.
|
||||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||||
|
|
||||||
// Config holds dependencies for the spec skill.
|
// Config holds dependencies for the spec skill.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
SkillPrompt string
|
SkillPrompt string
|
||||||
DefaultModel string
|
DefaultModel string
|
||||||
ExecutorFn ExecutorFn
|
CompleteFunc CompleteFunc
|
||||||
SessionsDir string
|
SessionsDir string
|
||||||
|
IngestBaseURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill implements the spec MCP tool.
|
// Skill implements the spec MCP tool.
|
||||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
|||||||
return []registry.ToolDef{
|
return []registry.ToolDef{
|
||||||
{
|
{
|
||||||
Name: "spec",
|
Name: "spec",
|
||||||
Description: "Generate a structured implementation spec from requirements. Writes the spec to output_path in the project.",
|
Description: "Consult a local model to draft a structured implementation spec from requirements. Returns the spec text.",
|
||||||
InputSchema: schema(
|
InputSchema: schema(
|
||||||
[]string{"project_root", "requirements"},
|
[]string{"project_root", "requirements"},
|
||||||
map[string]any{
|
map[string]any{
|
||||||
|
|||||||
@@ -4,8 +4,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
"github.com/mathiasbq/supervisor/internal/brain"
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -40,11 +41,16 @@ func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMes
|
|||||||
if args.Spec == "" {
|
if args.Spec == "" {
|
||||||
return nil, fmt.Errorf("spec is required")
|
return nil, fmt.Errorf("spec is required")
|
||||||
}
|
}
|
||||||
|
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, args.Spec, 3)
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
task := fmt.Sprintf(
|
||||||
"phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s",
|
"phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s",
|
||||||
args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd,
|
args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd,
|
||||||
)
|
)
|
||||||
return s.execute(ctx, task)
|
if brainCtx != "" {
|
||||||
|
task = brainCtx + "\n---\n\n" + task
|
||||||
|
}
|
||||||
|
return s.complete(ctx, s.resolveModel(args.Model), task)
|
||||||
}
|
}
|
||||||
|
|
||||||
type greenArgs struct {
|
type greenArgs struct {
|
||||||
@@ -70,8 +76,15 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
|
|||||||
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||||
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
||||||
)
|
)
|
||||||
task = s.prependHistory(args.SessionID, "green", task)
|
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)
|
||||||
return s.execute(ctx, task)
|
|
||||||
|
t0 := time.Now()
|
||||||
|
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "green", s.resolveModel(args.Model), t0, result)
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type refactorArgs struct {
|
type refactorArgs struct {
|
||||||
@@ -101,23 +114,15 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
|
|||||||
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||||
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
||||||
)
|
)
|
||||||
task = s.prependHistory(args.SessionID, "refactor", task)
|
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)
|
||||||
return s.execute(ctx, task)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
t0 := time.Now()
|
||||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
||||||
return task
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "refactor", s.resolveModel(args.Model), t0, result)
|
||||||
if err != nil || len(entries) == 0 {
|
return result, nil
|
||||||
return task
|
|
||||||
}
|
|
||||||
history := session.FormatHistory(entries, currentPhase)
|
|
||||||
if history == "" {
|
|
||||||
return task
|
|
||||||
}
|
|
||||||
return history + "\n---\n\n" + task
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) resolveModel(override string) string {
|
func (s *Skill) resolveModel(override string) string {
|
||||||
@@ -127,17 +132,42 @@ func (s *Skill) resolveModel(override string) string {
|
|||||||
return s.cfg.DefaultModel
|
return s.cfg.DefaultModel
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) {
|
// complete calls CompleteFunc and returns the text as JSON.
|
||||||
if s.cfg.ExecutorFn == nil {
|
func (s *Skill) complete(ctx context.Context, model, task string) (json.RawMessage, error) {
|
||||||
|
if s.cfg.CompleteFunc == nil {
|
||||||
return nil, fmt.Errorf("no executor configured")
|
return nil, fmt.Errorf("no executor configured")
|
||||||
}
|
}
|
||||||
req := iexec.Request{
|
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||||
SkillPrompt: s.cfg.SkillPrompt,
|
|
||||||
TaskPrompt: task,
|
|
||||||
}
|
|
||||||
result, err := s.cfg.ExecutorFn(ctx, req)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return json.Marshal(result)
|
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||||
|
}
|
||||||
|
|
||||||
|
// logEntry writes a session.Entry for a completed phase if session_id is set.
|
||||||
|
func (s *Skill) logEntry(sessionID, projectRoot, skill, phase, model string, t0 time.Time, raw json.RawMessage) {
|
||||||
|
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var msg string
|
||||||
|
var result struct {
|
||||||
|
Text string `json:"text"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(raw, &result); err == nil && len(result.Text) > 0 {
|
||||||
|
msg = result.Text
|
||||||
|
if len(msg) > 200 {
|
||||||
|
msg = msg[:200]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
|
||||||
|
SessionID: sessionID,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Skill: skill,
|
||||||
|
Phase: phase,
|
||||||
|
ProjectRoot: projectRoot,
|
||||||
|
FinalStatus: "ok",
|
||||||
|
ModelUsed: model,
|
||||||
|
DurationMs: time.Since(t0).Milliseconds(),
|
||||||
|
Message: msg,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
"github.com/mathiasbq/supervisor/internal/session"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -14,7 +13,6 @@ import (
|
|||||||
|
|
||||||
func TestTDDSkillTools(t *testing.T) {
|
func TestTDDSkillTools(t *testing.T) {
|
||||||
skill := tdd.New(tdd.Config{
|
skill := tdd.New(tdd.Config{
|
||||||
SystemPrompt: "supervisor rules",
|
|
||||||
SkillPrompt: "tdd rules",
|
SkillPrompt: "tdd rules",
|
||||||
})
|
})
|
||||||
tools := skill.Tools()
|
tools := skill.Tools()
|
||||||
@@ -26,19 +24,19 @@ func TestTDDSkillTools(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestTDDSkillHandleUnknown(t *testing.T) {
|
func TestTDDSkillHandleUnknown(t *testing.T) {
|
||||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||||
_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
|
_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
|
||||||
assert.ErrorContains(t, err, "unknown tool")
|
assert.ErrorContains(t, err, "unknown tool")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTDDRedRequiresProjectRoot(t *testing.T) {
|
func TestTDDRedRequiresProjectRoot(t *testing.T) {
|
||||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
|
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
|
||||||
assert.ErrorContains(t, err, "project_root")
|
assert.ErrorContains(t, err, "project_root")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTDDRedRequiresSpec(t *testing.T) {
|
func TestTDDRedRequiresSpec(t *testing.T) {
|
||||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
|
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
|
||||||
assert.ErrorContains(t, err, "spec")
|
assert.ErrorContains(t, err, "spec")
|
||||||
}
|
}
|
||||||
@@ -51,35 +49,49 @@ func TestTDDGreenInjectsSessionHistory(t *testing.T) {
|
|||||||
Message: "wrote failing test for Foo",
|
Message: "wrote failing test for Foo",
|
||||||
}))
|
}))
|
||||||
|
|
||||||
var capturedPrompt string
|
var capturedTask string
|
||||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
capturedPrompt = req.TaskPrompt
|
capturedTask = user
|
||||||
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
return "here is my suggestion", 100, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir})
|
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: sessDir})
|
||||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
|
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
|
||||||
))
|
))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Contains(t, capturedPrompt, "## Session history")
|
assert.Contains(t, capturedTask, "## Session history")
|
||||||
assert.Contains(t, capturedPrompt, "wrote failing test for Foo")
|
assert.Contains(t, capturedTask, "wrote failing test for Foo")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
|
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
|
||||||
var capturedPrompt string
|
var capturedTask string
|
||||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||||
capturedPrompt = req.TaskPrompt
|
capturedTask = user
|
||||||
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
return "suggestion", 50, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
|
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
|
||||||
))
|
))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.NotContains(t, capturedPrompt, "## Session history")
|
assert.NotContains(t, capturedTask, "## Session history")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure require is used (avoids import error).
|
func TestTDDGreenReturnsTextJSON(t *testing.T) {
|
||||||
var _ = require.New
|
fakeFn := func(_ context.Context, _, _, _ string) (string, int64, error) {
|
||||||
|
return "write a func that adds two ints", 42, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn})
|
||||||
|
raw, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||||
|
`{"project_root":"/tmp","test_path":"foo_test.go"}`,
|
||||||
|
))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(raw, &result))
|
||||||
|
assert.Equal(t, "write a func that adds two ints", result["text"])
|
||||||
|
assert.Equal(t, float64(42), result["duration_ms"])
|
||||||
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user