// Package vectorstore stores brain note embeddings in pgvector on the // shared postgres18 instance. One row per markdown path, cosine-distance // indexed via HNSW for sub-millisecond top-k retrieval. package vectorstore import ( "context" "errors" "fmt" "strings" "time" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" ) // Hit is a single result from a cosine-distance search. type Hit struct { Path string Distance float64 // 0 = identical, 2 = opposite } // PGStore is a pgvector-backed embeddings store. Construct with New and // call Init once to create the table + HNSW index. Use Close to release // the underlying pool. type PGStore struct { pool *pgxpool.Pool } // New opens a connection pool against dsn (a libpq-style URL). Caller // owns the resulting *PGStore and must invoke Close. func New(ctx context.Context, dsn string) (*PGStore, error) { pool, err := pgxpool.New(ctx, dsn) if err != nil { return nil, fmt.Errorf("pgxpool: %w", err) } if err := pool.Ping(ctx); err != nil { pool.Close() return nil, fmt.Errorf("ping: %w", err) } return &PGStore{pool: pool}, nil } // Close releases the underlying connection pool. func (s *PGStore) Close() { if s.pool != nil { s.pool.Close() } } // Init creates the brain_embeddings table and its HNSW index if they // don't already exist. Safe to call on every startup. Assumes the // `vector` extension is already installed (one-time DBA setup; see // scripts/brain-embeddings-init.sql). func (s *PGStore) Init(ctx context.Context) error { const ddl = ` CREATE TABLE IF NOT EXISTS brain_embeddings ( path TEXT PRIMARY KEY, embedding vector(768) NOT NULL, updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS brain_embeddings_embedding_idx ON brain_embeddings USING hnsw (embedding vector_cosine_ops); ` _, err := s.pool.Exec(ctx, ddl) return err } // Upsert inserts or replaces the embedding for path. Embedding must be // 768-dim (nomic-embed-text). Caller is responsible for normalising // paths to forward-slash form. func (s *PGStore) Upsert(ctx context.Context, path string, embedding []float32) error { if len(embedding) != 768 { return fmt.Errorf("expected 768-dim embedding, got %d", len(embedding)) } _, err := s.pool.Exec(ctx, ` INSERT INTO brain_embeddings (path, embedding, updated_at) VALUES ($1, $2, now()) ON CONFLICT (path) DO UPDATE SET embedding = EXCLUDED.embedding, updated_at = now() `, path, vectorLiteral(embedding)) return err } // Delete removes the row at path. No-op when the row doesn't exist. func (s *PGStore) Delete(ctx context.Context, path string) error { _, err := s.pool.Exec(ctx, `DELETE FROM brain_embeddings WHERE path = $1`, path) return err } // Search returns the top-limit nearest paths by cosine distance. func (s *PGStore) Search(ctx context.Context, query []float32, limit int) ([]Hit, error) { if len(query) != 768 { return nil, fmt.Errorf("expected 768-dim query, got %d", len(query)) } if limit <= 0 { limit = 10 } rows, err := s.pool.Query(ctx, ` SELECT path, embedding <=> $1 AS distance FROM brain_embeddings ORDER BY embedding <=> $1 LIMIT $2 `, vectorLiteral(query), limit) if err != nil { return nil, fmt.Errorf("query: %w", err) } defer rows.Close() var hits []Hit for rows.Next() { var h Hit if err := rows.Scan(&h.Path, &h.Distance); err != nil { return nil, fmt.Errorf("scan: %w", err) } hits = append(hits, h) } if err := rows.Err(); err != nil && !errors.Is(err, pgx.ErrNoRows) { return nil, err } return hits, nil } // KnownPathsWithTime returns every embedded chunk path paired with the // row's updated_at. Sync uses the timestamps to decide whether a file // has been edited since its chunks were last embedded — when the file's // mtime exceeds the oldest chunk's updated_at, the file is re-embedded. func (s *PGStore) KnownPathsWithTime(ctx context.Context) (map[string]time.Time, error) { rows, err := s.pool.Query(ctx, `SELECT path, updated_at FROM brain_embeddings`) if err != nil { return nil, fmt.Errorf("query paths: %w", err) } defer rows.Close() out := make(map[string]time.Time) for rows.Next() { var ( p string t time.Time ) if err := rows.Scan(&p, &t); err != nil { return nil, err } out[p] = t } return out, rows.Err() } // vectorLiteral renders a Go float32 slice as the literal representation // pgvector accepts as a parametric input: `[v1,v2,...,vN]`. func vectorLiteral(v []float32) string { var b strings.Builder b.WriteByte('[') for i, x := range v { if i > 0 { b.WriteByte(',') } fmt.Fprintf(&b, "%g", x) } b.WriteByte(']') return b.String() }