Skip to content

Commit

Permalink
change: do not stat file for fileSize during ingestion flow as that h…
Browse files Browse the repository at this point in the history
…appens in knowledge already (#653)
  • Loading branch information
iwilltry42 authored Nov 21, 2024
1 parent 0e811e1 commit 8ff27a0
Showing 1 changed file with 1 addition and 8 deletions.
9 changes: 1 addition & 8 deletions pkg/controller/handlers/knowledgefile/knowledgefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ func (h *Handler) ingest(ctx context.Context, client kclient.Client, file *v1.Kn

inputName := file.Spec.FileName

// Clean website content (remove headers, footers, etc.)
if source.Spec.Manifest.GetType() == types.KnowledgeSourceTypeWebsite && strings.HasSuffix(file.Spec.FileName, ".md") {
content, err := h.gptScript.ReadFileInWorkspace(ctx, file.Spec.FileName, gptscript.ReadFileInWorkspaceOptions{
WorkspaceID: thread.Status.WorkspaceID,
Expand Down Expand Up @@ -238,21 +239,13 @@ func (h *Handler) ingest(ctx context.Context, client kclient.Client, file *v1.Kn
return &unsupportedErr
}

stat, err := h.gptScript.StatFileInWorkspace(ctx, outputFile(file.Spec.FileName), gptscript.StatFileInWorkspaceOptions{
WorkspaceID: thread.Status.WorkspaceID,
})
if err != nil {
return fmt.Errorf("failed to stat files in workspace ID %s, error: %w", thread.Status.WorkspaceID, err)
}

ingestTask, err := h.invoker.SystemTask(ctx, thread, system.KnowledgeIngestTool, map[string]any{
"input": outputFile(file.Spec.FileName),
"dataset": ks.Namespace + "/" + ks.Name,
"metadata_json": map[string]string{
"url": file.Spec.URL,
"workspaceID": thread.Status.WorkspaceID,
"workspaceFileName": outputFile(file.Spec.FileName),
"fileSize": fmt.Sprintf("%d", stat.Size),
},
})
if err != nil {
Expand Down

0 comments on commit 8ff27a0

Please sign in to comment.