🔧 fix(assistants): Vision minor fix & Add Docs (#2196)

* 👓 fix(assistants): Only Retrieve Assistant Data for Vision Requests if attachments exist in Host Storage * docs: add capability
danny-avila · Mar 25, 2024 · f86d80d · f86d80d
1 parent 798e876
commit f86d80d
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 25 deletions.
diff --git a/api/server/routes/assistants/chat.js b/api/server/routes/assistants/chat.js
@@ -363,16 +363,24 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res
         return;
       }
 
+      /** @type {MongoFile[]} */
+      const attachments = await req.body.endpointOption.attachments;
+      if (
+        attachments &&
+        attachments.every((attachment) => attachment.source === FileSources.openai)
+      ) {
+        return;
+      }
+
       const assistant = await openai.beta.assistants.retrieve(assistant_id);
       const visionToolIndex = assistant.tools.findIndex(
-        (tool) => tool.function.name === ImageVisionTool.function.name,
+        (tool) => tool?.function && tool?.function?.name === ImageVisionTool.function.name,
       );
 
       if (visionToolIndex === -1) {
         return;
       }
 
-      const attachments = await req.body.endpointOption.attachments;
       let visionMessage = {
         role: 'user',
         content: '',

diff --git a/docs/install/configuration/custom_config.md b/docs/install/configuration/custom_config.md
@@ -126,7 +126,7 @@ endpoints:
     # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
     # retrievalModels: ["gpt-4-turbo-preview"]
     # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-    # capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+    # capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
   custom:
     - name: "Mistral"
       apiKey: "${MISTRAL_API_KEY}"
@@ -475,7 +475,7 @@ endpoints:
     # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
     # retrievalModels: ["gpt-4-turbo-preview"]
     # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-    # capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+    # capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
 ```
 > This configuration enables the builder interface for assistants, sets a polling interval of 500ms to check for run updates, and establishes a timeout of 10 seconds for assistant run operations.
 
@@ -538,9 +538,10 @@ In addition to custom endpoints, you can configure settings specific to the assi
 > Specifies the assistant capabilities available to all users for the assistants endpoint.
 
 - **Type**: Array/List of Strings
-- **Example**: `capabilities: ["code_interpreter", "retrieval", "actions", "tools"]`
+- **Example**: `capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]`
 - **Description**: Defines the assistant capabilities that are available to all users for the assistants endpoint. You can omit the capabilities you wish to exclude from the list. The available capabilities are:
   - `code_interpreter`: Enables code interpretation capabilities for the assistant.
+  - `image_vision`: Enables unofficial vision support for uploaded images.
   - `retrieval`: Enables retrieval capabilities for the assistant.
   - `actions`: Enables action capabilities for the assistant.
   - `tools`: Enables tool capabilities for the assistant.

diff --git a/librechat.example.yaml b/librechat.example.yaml
@@ -24,25 +24,6 @@ registration:
   socialLogins: ['github', 'google', 'discord', 'openid', 'facebook']
   # allowedDomains:
   # - "gmail.com"
-
-# fileConfig:
-#   endpoints:
-#     assistants:
-#       fileLimit: 5
-#       fileSizeLimit: 10  # Maximum size for an individual file in MB
-#       totalSizeLimit: 50  # Maximum total size for all files in a single request in MB
-#       supportedMimeTypes:
-#         - "image/.*"
-#         - "application/pdf"
-#     openAI:
-#       disabled: true  # Disables file uploading to the OpenAI endpoint
-#     default:
-#       totalSizeLimit: 20
-#     YourCustomEndpointName:
-#       fileLimit: 2
-#       fileSizeLimit: 5
-#   serverFileSizeLimit: 100  # Global server file size limit in MB
-#   avatarSizeLimit: 2  # Limit for user avatar image size in MB
 # rateLimits:
 #   fileUploads:
 #     ipMax: 100
@@ -62,7 +43,7 @@ endpoints:
   #   # (optional) Models that support retrieval, will default to latest known OpenAI models that support the feature
   #   retrievalModels: ["gpt-4-turbo-preview"]
   #   # (optional) Assistant Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below.
-  #   capabilities: ["code_interpreter", "retrieval", "actions", "tools"]
+  #   capabilities: ["code_interpreter", "retrieval", "actions", "tools", "image_vision"]
   custom:
     # Groq Example
     - name: 'groq'
@@ -135,5 +116,23 @@ endpoints:
       # Recommended: Drop the stop parameter from the request as Openrouter models use a variety of stop tokens.
       dropParams: ['stop']
       modelDisplayLabel: 'OpenRouter'
+# fileConfig:
+#   endpoints:
+#     assistants:
+#       fileLimit: 5
+#       fileSizeLimit: 10  # Maximum size for an individual file in MB
+#       totalSizeLimit: 50  # Maximum total size for all files in a single request in MB
+#       supportedMimeTypes:
+#         - "image/.*"
+#         - "application/pdf"
+#     openAI:
+#       disabled: true  # Disables file uploading to the OpenAI endpoint
+#     default:
+#       totalSizeLimit: 20
+#     YourCustomEndpointName:
+#       fileLimit: 2
+#       fileSizeLimit: 5
+#   serverFileSizeLimit: 100  # Global server file size limit in MB
+#   avatarSizeLimit: 2  # Limit for user avatar image size in MB
 # See the Custom Configuration Guide for more information:
 # https://docs.librechat.ai/install/configuration/custom_config.html