feat(vectorstore): re-embed on file mtime > store updated_at (#23)
Removes the TODO in Sync that left files static after their first embed. Edits to brain/wiki/ and brain/knowledge/ now surface in subsequent syncs without manual /backfill-embeddings calls. Approach - Store interface: KnownPaths → KnownPathsWithTime returning path → updated_at. Callers compare against file mtime to detect edits. - PGStore: SELECT path, updated_at FROM brain_embeddings. - Sync groups known chunks by parent path and tracks the EARLIEST updated_at per parent. A file is stale when its mtime is after that oldest chunk's timestamp — any chunk older than the file means at least one chunk hasn't been refreshed since the last edit. - Stale-path rewrite: delete every old chunk for the parent (handles "file shrunk → fewer chunks → orphan rows at higher #NNNN" cleanly), then re-chunk + re-embed + re-upsert. Tests - New: TestSync_ReembedsFileWhenMtimeNewer — file mtime forced into the future vs store updated_at; Sync deletes old chunk + upserts fresh one. - New: TestSync_SkipsFileWhenMtimeOlder — file mtime backdated; Sync is a no-op (no upserts, no deletes). - Updated: stubStore.known is now map[string]time.Time. A zero value resolves to a far-future sentinel so existing "skip if already known" tests keep passing without per-test setup. - pg_test renamed KnownPaths integration → KnownPathsWithTime; asserts updated_at is non-zero and within 5s of insert wall-clock. Backward compat - brain_embeddings rows pre-dating this change carry valid updated_at values (column was always populated via `DEFAULT now()` + ON CONFLICT `updated_at = now()`). No migration needed. Live pod will start re-embedding any file whose source has been edited since its chunks were originally written. Closes gitea/mathias/hyperguild#23.
This commit is contained in:
@@ -36,7 +36,7 @@ func freshStore(t *testing.T) (*vectorstore.PGStore, context.Context) {
|
||||
t.Cleanup(s.Close)
|
||||
require.NoError(t, s.Init(ctx))
|
||||
// Clean slate per test.
|
||||
_, _ = s.KnownPaths(ctx)
|
||||
_, _ = s.KnownPathsWithTime(ctx)
|
||||
require.NoError(t, s.Delete(ctx, "%test-fixture%"))
|
||||
return s, ctx
|
||||
}
|
||||
@@ -67,15 +67,18 @@ func TestIntegration_UpsertAndSearch(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestIntegration_KnownPaths(t *testing.T) {
|
||||
func TestIntegration_KnownPathsWithTime(t *testing.T) {
|
||||
s, ctx := freshStore(t)
|
||||
before := time.Now()
|
||||
require.NoError(t, s.Upsert(ctx, "wiki/k.md", vec(768, 0.5)))
|
||||
t.Cleanup(func() { _ = s.Delete(ctx, "wiki/k.md") })
|
||||
|
||||
paths, err := s.KnownPaths(ctx)
|
||||
paths, err := s.KnownPathsWithTime(ctx)
|
||||
require.NoError(t, err)
|
||||
_, ok := paths["wiki/k.md"]
|
||||
assert.True(t, ok)
|
||||
at, ok := paths["wiki/k.md"]
|
||||
require.True(t, ok)
|
||||
assert.False(t, at.IsZero(), "updated_at must not be zero")
|
||||
assert.WithinDuration(t, before, at, 5*time.Second, "updated_at must be recent")
|
||||
}
|
||||
|
||||
func TestUpsert_RejectsWrongDimension(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user