feat(watcher,api): use extract.Text() for file reading — fixes PDF ingestion
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||
)
|
||||
|
||||
@@ -88,12 +89,12 @@ func processFile(ctx context.Context, cfg Config, path, date string) error {
|
||||
filename := filepath.Base(path)
|
||||
source := deriveSource(filename)
|
||||
|
||||
content, err := os.ReadFile(path)
|
||||
content, err := extract.Text(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read file: %w", err)
|
||||
return fmt.Errorf("extract text: %w", err)
|
||||
}
|
||||
|
||||
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, string(content), source, false)
|
||||
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, content, source, false)
|
||||
if runErr != nil {
|
||||
// Move to failed/.
|
||||
failedDir := filepath.Join(cfg.BrainDir, "raw", "failed")
|
||||
|
||||
Reference in New Issue
Block a user