commit 7dfe8a792e6b26e42115f434710143b3ccd23216 Author: Mathias Bergqvist Date: Tue May 19 23:02:07 2026 +0200 feat: initial scaffold with context adapters and litellm pkg Co-Authored-By: Claude Sonnet 4.6 diff --git a/.aider.conf.yml b/.aider.conf.yml new file mode 100644 index 0000000..a16f762 --- /dev/null +++ b/.aider.conf.yml @@ -0,0 +1,2 @@ +read: .aider.conventions.md +auto-commits: false diff --git a/.aider.conventions.md b/.aider.conventions.md new file mode 100644 index 0000000..42ccd32 --- /dev/null +++ b/.aider.conventions.md @@ -0,0 +1,255 @@ +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.context/PROJECT.md b/.context/PROJECT.md new file mode 100644 index 0000000..b69a2a8 --- /dev/null +++ b/.context/PROJECT.md @@ -0,0 +1,22 @@ +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.context/mcp.json b/.context/mcp.json new file mode 100644 index 0000000..c9514c5 --- /dev/null +++ b/.context/mcp.json @@ -0,0 +1,26 @@ +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp", + "description": "Project knowledge base — vector + graph retrieval" + }, + "brain": { + "type": "http", + "url": "https://brain-mcp.d-ma.be/mcp", + "headers": { + "Authorization": "Bearer ${BRAIN_MCP_TOKEN}" + } + }, + "gitea": { + "type": "http", + "url": "https://git-mcp.d-ma.be/mcp", + "headers": { + "Authorization": "Bearer ${GITEA_MCP_TOKEN}" + } + }, + "infra": { + "type": "http", + "url": "https://infra-mcp.d-ma.be/mcp" + } + } +} diff --git a/.context/system-prompt.txt b/.context/system-prompt.txt new file mode 100644 index 0000000..5dff007 --- /dev/null +++ b/.context/system-prompt.txt @@ -0,0 +1,262 @@ +You are a coding assistant working on a specific project. +Follow all conventions from both the root agent context and project context. + +--- + +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. + +--- diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..ec680d6 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,258 @@ +# Cursor rules — auto-generated +# Do not edit. Run: task context:sync + +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..979e079 --- /dev/null +++ b/.env.example @@ -0,0 +1,9 @@ +# LiteLLM / model +LITELLM_API_KEY=your-key-here +LITELLM_BASE_URL=https://llm-api.d-ma.be/v1 +LITELLM_MODEL=berget/llama-3.3-70b + +# Observability (optional — omit to disable tracing) +OTLP_ENDPOINT=http://jaeger.d-ma.be:4318 +ADK_SERVICE_NAME=__PROJECT_NAME__ +ADK_SERVICE_VERSION=0.1.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3878602 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.exe +*.exe~ +*.dll +*.so +*.dylib +*.test +*.out +go.work +go.work.sum +.env +bin/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..42ccd32 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,255 @@ +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b69a2a8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,22 @@ +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/README.md b/README.md new file mode 100644 index 0000000..99ca285 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# __PROJECT_NAME__ + +Go agent built on [Google ADK](https://google.golang.org/adk) with a LiteLLM adapter for local model routing. + +## Quick start + +```bash +cp .env.example .env +# edit .env with your LITELLM_API_KEY +go mod tidy +task run +``` + +## Observability + +Set `OTLP_ENDPOINT=http://jaeger.d-ma.be:4318` to emit traces. Each invocation produces: +- `invoke_agent __PROJECT_NAME__` span +- `generate_content ` child span with `gen_ai.request.model` attribute + +## Structure + +``` +cmd/__PROJECT_NAME__/ agent entrypoint +pkg/litellm/ OpenAI-compat ADK adapter + OTLP telemetry helper +``` diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 0000000..2767af8 --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,36 @@ +version: '3' + +tasks: + build: + desc: Build the agent binary + cmds: [go build -o bin/__PROJECT_NAME__ ./cmd/__PROJECT_NAME__] + + run: + desc: Run the agent (requires .env) + deps: [build] + cmds: [./bin/__PROJECT_NAME__] + + test: + desc: Run all tests + cmds: [go test ./... -race] + + lint: + cmds: [golangci-lint run ./...] + + check: + desc: Lint, vet, and test (used by CI) + cmds: + - golangci-lint run ./... + - go vet ./... + - go test ./... -race -count=1 + + context:sync: + desc: Regenerate all harness-specific context files + cmds: + - bash scripts/context-sync.sh + context:sync:claude: + cmds: [bash scripts/context-sync.sh claude] + context:sync:agents: + cmds: [bash scripts/context-sync.sh agents] + context:sync:cursor: + cmds: [bash scripts/context-sync.sh cursor] diff --git a/cmd/__PROJECT_NAME__/main.go b/cmd/__PROJECT_NAME__/main.go new file mode 100644 index 0000000..2c3b921 --- /dev/null +++ b/cmd/__PROJECT_NAME__/main.go @@ -0,0 +1,88 @@ +package main + +import ( + "context" + "fmt" + "os" + + "google.golang.org/adk/agent" + "google.golang.org/adk/agent/llmagent" + "google.golang.org/adk/runner" + "google.golang.org/adk/session" + "google.golang.org/genai" + + "__MODULE_PATH__/pkg/litellm" +) + +func main() { + ctx := context.Background() + + shutdown, err := litellm.SetupTelemetry(ctx) + if err != nil { + fmt.Fprintf(os.Stderr, "telemetry: %v\n", err) + os.Exit(1) + } + defer shutdown(ctx) + + llm := litellm.New( + env("LITELLM_MODEL", "berget/llama-3.3-70b"), + env("LITELLM_BASE_URL", "https://llm-api.d-ma.be/v1"), + mustEnv("LITELLM_API_KEY"), + ) + + ag, err := llmagent.New(llmagent.Config{ + Name: "__PROJECT_NAME__", + Description: "TODO: describe what this agent does", + Model: llm, + Instruction: "You are a helpful assistant.", + }) + if err != nil { + fmt.Fprintf(os.Stderr, "agent: %v\n", err) + os.Exit(1) + } + + r, err := runner.New(runner.Config{ + AppName: "__PROJECT_NAME__", + Agent: ag, + SessionService: session.InMemoryService(), + AutoCreateSession: true, + }) + if err != nil { + fmt.Fprintf(os.Stderr, "runner: %v\n", err) + os.Exit(1) + } + + msg := genai.NewContentFromText("Hello!", "user") + events := r.Run(ctx, "user-1", "session-1", msg, agent.RunConfig{}) + + for ev, err := range events { + if err != nil { + fmt.Fprintf(os.Stderr, "run: %v\n", err) + os.Exit(1) + } + if ev == nil || ev.Content == nil { + continue + } + for _, p := range ev.Content.Parts { + if p != nil && p.Text != "" { + fmt.Println(p.Text) + } + } + } +} + +func env(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + +func mustEnv(key string) string { + v := os.Getenv(key) + if v == "" { + fmt.Fprintf(os.Stderr, "required env var %s not set\n", key) + os.Exit(1) + } + return v +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b50538b --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module __MODULE_PATH__ + +go 1.26 + +require ( + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + google.golang.org/adk v1.2.0 +) diff --git a/pkg/litellm/model.go b/pkg/litellm/model.go new file mode 100644 index 0000000..6970240 --- /dev/null +++ b/pkg/litellm/model.go @@ -0,0 +1,250 @@ +package litellm + +// Model implements google.golang.org/adk/model.LLM against any +// OpenAI-compatible endpoint (LiteLLM, Ollama, vLLM, etc.). +// +// The official Go ADK v1.x ships only Gemini adapters. This adapter +// implements the official interface directly via net/http — no extra deps. + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "iter" + "net/http" + + adkmodel "google.golang.org/adk/model" + "google.golang.org/genai" +) + +// Model is an ADK-compatible LLM backed by an OpenAI-compatible endpoint. +type Model struct { + name string + baseURL string + apiKey string + client *http.Client +} + +// New creates an OpenAI-compatible ADK model. +// name is the model identifier sent in requests (e.g. "berget/llama-3.3-70b"). +// baseURL is the API base without path (e.g. "https://llm-api.d-ma.be/v1"). +func New(name, baseURL, apiKey string) *Model { + return &Model{name: name, baseURL: baseURL, apiKey: apiKey, client: &http.Client{}} +} + +func (m *Model) Name() string { return m.name } + +// --- OpenAI wire types (minimal subset ADK uses) --- + +type oaiMessage struct { + Role string `json:"role"` + Content string `json:"content,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` + ToolCalls []oaiToolCall `json:"tool_calls,omitempty"` +} + +type oaiToolCall struct { + ID string `json:"id"` + Type string `json:"type"` + Function oaiFunctionCall `json:"function"` +} + +type oaiFunctionCall struct { + Name string `json:"name"` + Arguments string `json:"arguments"` +} + +type oaiTool struct { + Type string `json:"type"` + Function oaiFunctionDef `json:"function"` +} + +type oaiFunctionDef struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Parameters json.RawMessage `json:"parameters,omitempty"` +} + +type oaiRequest struct { + Model string `json:"model"` + Messages []oaiMessage `json:"messages"` + Tools []oaiTool `json:"tools,omitempty"` +} + +type oaiChoice struct { + Message oaiMessage `json:"message"` + FinishReason string `json:"finish_reason"` +} + +type oaiResponse struct { + Choices []oaiChoice `json:"choices"` + Error *struct { + Message string `json:"message"` + } `json:"error,omitempty"` +} + +func (m *Model) GenerateContent(ctx context.Context, req *adkmodel.LLMRequest, _ bool) iter.Seq2[*adkmodel.LLMResponse, error] { + return func(yield func(*adkmodel.LLMResponse, error) bool) { + msgs := contentsToMessages(req.Contents) + tools := adk2oaiTools(req) + + oaiReq := oaiRequest{Model: m.name, Messages: msgs, Tools: tools} + + body, err := json.Marshal(oaiReq) + if err != nil { + yield(nil, fmt.Errorf("marshal: %w", err)) + return + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, + m.baseURL+"/chat/completions", bytes.NewReader(body)) + if err != nil { + yield(nil, fmt.Errorf("new request: %w", err)) + return + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Authorization", "Bearer "+m.apiKey) + + resp, err := m.client.Do(httpReq) + if err != nil { + yield(nil, fmt.Errorf("http: %w", err)) + return + } + defer resp.Body.Close() + + raw, err := io.ReadAll(resp.Body) + if err != nil { + yield(nil, fmt.Errorf("read body: %w", err)) + return + } + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(raw))) + return + } + + var oaiResp oaiResponse + if err := json.Unmarshal(raw, &oaiResp); err != nil { + yield(nil, fmt.Errorf("unmarshal: %w", err)) + return + } + if oaiResp.Error != nil { + yield(nil, fmt.Errorf("api error: %s", oaiResp.Error.Message)) + return + } + if len(oaiResp.Choices) == 0 { + yield(nil, fmt.Errorf("no choices in response")) + return + } + + content := oaiChoiceToContent(oaiResp.Choices[0]) + yield(&adkmodel.LLMResponse{Content: content, TurnComplete: true}, nil) + } +} + +func contentsToMessages(contents []*genai.Content) []oaiMessage { + var msgs []oaiMessage + for _, c := range contents { + if c == nil { + continue + } + var textBuf bytes.Buffer + var toolCalls []oaiToolCall + + for _, p := range c.Parts { + if p == nil { + continue + } + if p.Text != "" { + textBuf.WriteString(p.Text) + } + if p.FunctionCall != nil { + argBytes, _ := json.Marshal(p.FunctionCall.Args) + toolCalls = append(toolCalls, oaiToolCall{ + ID: p.FunctionCall.ID, + Type: "function", + Function: oaiFunctionCall{ + Name: p.FunctionCall.Name, + Arguments: string(argBytes), + }, + }) + } + if p.FunctionResponse != nil { + respBytes, _ := json.Marshal(p.FunctionResponse.Response) + msgs = append(msgs, oaiMessage{ + Role: "tool", + Content: string(respBytes), + ToolCallID: p.FunctionResponse.ID, + }) + } + } + + if len(toolCalls) > 0 || textBuf.Len() > 0 { + msg := oaiMessage{Role: c.Role} + if c.Role == "model" { + msg.Role = "assistant" + } + msg.Content = textBuf.String() + msg.ToolCalls = toolCalls + msgs = append(msgs, msg) + } + } + return msgs +} + +func adk2oaiTools(req *adkmodel.LLMRequest) []oaiTool { + if len(req.Tools) == 0 { + return nil + } + var tools []oaiTool + for name, def := range req.Tools { + raw, _ := json.Marshal(def) + var m map[string]json.RawMessage + _ = json.Unmarshal(raw, &m) + var desc string + if d, ok := m["description"]; ok { + _ = json.Unmarshal(d, &desc) + } + params := m["parameters"] + // Some endpoints (e.g. Berget) reject null parameters for zero-arg tools. + if len(params) == 0 || string(params) == "null" { + params = json.RawMessage(`{"type":"object","properties":{}}`) + } + tools = append(tools, oaiTool{ + Type: "function", + Function: oaiFunctionDef{ + Name: name, + Description: desc, + Parameters: params, + }, + }) + } + return tools +} + +func oaiChoiceToContent(choice oaiChoice) *genai.Content { + msg := choice.Message + var parts []*genai.Part + + if msg.Content != "" { + parts = append(parts, &genai.Part{Text: msg.Content}) + } + for _, tc := range msg.ToolCalls { + var args map[string]any + _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) + parts = append(parts, &genai.Part{ + FunctionCall: &genai.FunctionCall{ + ID: tc.ID, + Name: tc.Function.Name, + Args: args, + }, + }) + } + + role := msg.Role + if role == "assistant" { + role = "model" + } + return &genai.Content{Role: role, Parts: parts} +} diff --git a/pkg/litellm/telemetry.go b/pkg/litellm/telemetry.go new file mode 100644 index 0000000..0c14a7b --- /dev/null +++ b/pkg/litellm/telemetry.go @@ -0,0 +1,71 @@ +package litellm + +import ( + "context" + "fmt" + "os" + "time" + + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.36.0" + "google.golang.org/adk/telemetry" +) + +// SetupTelemetry wires ADK OTLP tracing from environment variables. +// +// Reads: +// - OTLP_ENDPOINT full URL base, e.g. http://jaeger.d-ma.be:4318 (skip if empty) +// - ADK_SERVICE_NAME service name in Jaeger (default: "agent") +// - ADK_SERVICE_VERSION semver label (default: "0.1.0") +// +// Returns a shutdown func to call on exit with a short-timeout context. +// No-op (nil error, noop shutdown) when OTLP_ENDPOINT is unset. +func SetupTelemetry(ctx context.Context) (shutdown func(context.Context) error, err error) { + endpoint := os.Getenv("OTLP_ENDPOINT") + if endpoint == "" { + return func(context.Context) error { return nil }, nil + } + + svcName := os.Getenv("ADK_SERVICE_NAME") + if svcName == "" { + svcName = "agent" + } + svcVersion := os.Getenv("ADK_SERVICE_VERSION") + if svcVersion == "" { + svcVersion = "0.1.0" + } + + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpointURL(endpoint+"/v1/traces"), + ) + if err != nil { + return nil, fmt.Errorf("otlp exporter: %w", err) + } + + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceName(svcName), + semconv.ServiceVersion(svcVersion), + ), + ) + if err != nil { + return nil, fmt.Errorf("resource: %w", err) + } + + providers, err := telemetry.New(ctx, + telemetry.WithSpanProcessors(sdktrace.NewBatchSpanProcessor(exporter)), + telemetry.WithResource(res), + ) + if err != nil { + return nil, fmt.Errorf("telemetry.New: %w", err) + } + providers.SetGlobalOtelProviders() + + return func(ctx context.Context) error { + shutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + return providers.Shutdown(shutCtx) + }, nil +} diff --git a/scripts/context-sync.sh b/scripts/context-sync.sh new file mode 100755 index 0000000..4f7300e --- /dev/null +++ b/scripts/context-sync.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash +# Generates harness-specific context files from .context/PROJECT.md +# Project-level script — run from a project directory. +# +# For Claude Code: generates project-only CLAUDE.md (it inherits root via tree walk) +# For everything else: concatenates root AGENT.md + project PROJECT.md +# +# Usage: ./scripts/context-sync.sh [--force] [adapter...] +# Task: task context:sync +# +# Override root context: ROOT_CONTEXT=~/dev/.context/AGENT.md ./scripts/context-sync.sh + +set -euo pipefail + +# Parse --force flag and collect adapter names separately +FORCE=false +ADAPTERS=() +for _arg in "$@"; do + case "$_arg" in + --force) FORCE=true ;; + *) ADAPTERS+=("$_arg") ;; + esac +done + +PROJECT_FILE=".context/PROJECT.md" + +# Walk up to find root .context/AGENT.md +find_root_context() { + local dir + dir="$(pwd)" + while [ "$dir" != "/" ]; do + dir="$(dirname "$dir")" + if [ -f "$dir/.context/AGENT.md" ]; then + echo "$dir/.context/AGENT.md" + return + fi + done + echo "" +} + +ROOT_CONTEXT="${ROOT_CONTEXT:-$(find_root_context)}" + +if [ ! -f "$PROJECT_FILE" ]; then + echo "Error: $PROJECT_FILE not found. Are you in a project root?" + exit 1 +fi + +# Pre-flight: reject unfilled {{...}} placeholders unless --force +if [ "$FORCE" = false ]; then + _placeholders=$(grep -n '{{[^}]*}}' "$PROJECT_FILE" 2>/dev/null || true) + if [ -n "$_placeholders" ]; then + echo "Error: unfilled placeholders in $PROJECT_FILE:" >&2 + while IFS= read -r _match; do + _lineno="${_match%%:*}" + _content="${_match#*:}" + _token=$(printf '%s' "$_content" | grep -o '{{[^}]*}}' | head -1) + echo " $PROJECT_FILE:$_lineno: unfilled placeholder $_token" >&2 + done <<< "$_placeholders" + echo "" >&2 + echo "Fill these placeholders, then re-run: task context:sync" >&2 + echo "To bypass validation: bash scripts/context-sync.sh --force" >&2 + exit 1 + fi +fi + +if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + echo " Root context: $ROOT_CONTEXT" +else + echo " No root AGENT.md found (project context only)" +fi + +# Emit root context + separator +root_block() { + if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + cat "$ROOT_CONTEXT" + echo "" + echo "---" + echo "" + fi +} + +# ── Claude Code ────────────────────────────────────────────── +# Claude Code walks up the tree — it finds ~/dev/CLAUDE.md automatically. +# Project-level CLAUDE.md only needs project-specific context. +generate_claude() { + cat "$PROJECT_FILE" > CLAUDE.md + echo " → CLAUDE.md (project-only; Claude Code inherits root)" +} + +# ── AGENTS.md (Crush, Pi, Antigravity) ────────────────────── +# These tools read AGENTS.md from cwd but don't walk up. +# Concatenate root + project. +generate_agents() { + { root_block; cat "$PROJECT_FILE"; } > AGENTS.md + echo " → AGENTS.md (root + project; Crush, Pi, Antigravity)" +} + +# ── Cursor ─────────────────────────────────────────────────── +generate_cursor() { + { + echo "# Cursor rules — auto-generated" + echo "# Do not edit. Run: task context:sync" + echo "" + root_block + cat "$PROJECT_FILE" + } > .cursorrules + echo " → .cursorrules (root + project)" +} + +# ── Aider ──────────────────────────────────────────────────── +generate_aider() { + { root_block; cat "$PROJECT_FILE"; } > .aider.conventions.md + if [ ! -f .aider.conf.yml ]; then + cat > .aider.conf.yml << 'YAML' +read: .aider.conventions.md +auto-commits: false +YAML + fi + echo " → .aider.conventions.md (root + project)" +} + +# ── Generic system prompt (Open WebUI, Mods, etc.) ────────── +generate_system_prompt() { + { + echo "You are a coding assistant working on a specific project." + echo "Follow all conventions from both the root agent context and project context." + echo "" + echo "---" + echo "" + root_block + cat "$PROJECT_FILE" + echo "" + echo "---" + } > .context/system-prompt.txt + echo " → .context/system-prompt.txt (root + project)" +} + +# ── MCP config ─────────────────────────────────────────────── +generate_mcp() { + # Ensure baseline file exists with project-specific knowledge server + if [ ! -f .context/mcp.json ]; then + cat > .context/mcp.json << 'JSON' +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp", + "description": "Project knowledge base — vector + graph retrieval" + } + } +} +JSON + fi + + # Merge root mcp-servers.json if found alongside root AGENT.md + local root_mcp="" + if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + local candidate + candidate="$(dirname "$ROOT_CONTEXT")/mcp-servers.json" + [ -f "$candidate" ] && root_mcp="$candidate" + fi + + if [ -z "$root_mcp" ]; then + echo " → .context/mcp.json (exists, no root mcp-servers.json found)" + return + fi + + # Root servers take precedence over project entries on key conflict + local root_servers count updated + root_servers=$(jq '.servers' "$root_mcp") + count=$(printf '%s' "$root_servers" | jq 'keys | length') + updated=$(jq --argjson root "$root_servers" \ + '.mcpServers = (.mcpServers + $root)' \ + .context/mcp.json) + printf '%s\n' "$updated" > .context/mcp.json + echo " → .context/mcp.json (merged $count root servers)" +} + +echo "Syncing project context from $PROJECT_FILE..." + +if [ ${#ADAPTERS[@]} -eq 0 ]; then + generate_claude + generate_agents + generate_cursor + generate_aider + generate_system_prompt + generate_mcp +else + for adapter in "${ADAPTERS[@]}"; do + case "$adapter" in + claude) generate_claude ;; + agents) generate_agents ;; + cursor) generate_cursor ;; + aider) generate_aider ;; + prompt|system|openwebui|owui|generic) generate_system_prompt ;; + mcp) generate_mcp ;; + *) echo "Unknown adapter: $adapter" >&2; exit 1 ;; + esac + done +fi + +echo "Done."