From 0b18f4da667601e9ffb3a4ccdd8fe65721504e61 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Mon, 16 Dec 2024 18:10:00 +0000
Subject: [PATCH 1/8] added gemini models

---
 refact_known_models/passthrough.py            | 49 +++++++++++++++++++
 refact_utils/finetune/utils.py                |  3 ++
 .../webgui/selfhost_fastapi_completions.py    |  7 ++-
 .../webgui/selfhost_model_assigner.py         |  2 +
 refact_webgui/webgui/selfhost_queue.py        |  2 +
 refact_webgui/webgui/selfhost_static.py       |  1 +
 .../webgui/static/tab-model-hosting.html      |  5 ++
 .../webgui/static/tab-model-hosting.js        |  5 ++
 refact_webgui/webgui/static/tab-settings.html |  3 ++
 refact_webgui/webgui/static/tab-settings.js   |  8 +++
 refact_webgui/webgui/tab_models_host.py       |  1 +
 refact_webgui/webgui/tab_settings.py          |  2 +
 setup.py                                      |  2 +-
 13 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index a2f6f0a7..3df5c3ed 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -209,5 +209,54 @@
         "pp1000t_prompt": 150,
         "pp1000t_generated": 600,  # TODO: don't know the price
         "filter_caps": ["chat", "completion"],
+    },
+
+    # gemini and gemma bear the same tokenizer
+    # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
+    # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests
+
+    "gemini-2.0-flash-exp": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-2.0-flash-exp",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
+        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-flash": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-flash",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
+        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-flash-8b": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-flash-8b",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
+        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-pro": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-pro",
+        "T": 2_097_152,
+        "T_out": 8_192,
+        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
+        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
     }
 }
diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py
index a196f050..81b753e0 100644
--- a/refact_utils/finetune/utils.py
+++ b/refact_utils/finetune/utils.py
@@ -112,6 +112,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None:
     if data.get('cerebras_api_enable'):
         _add_results_for_passthrough_provider('cerebras')
 
+    if data.get('gemini_api_enable'):
+        _add_results_for_passthrough_provider('gemini')
+
     if data.get('groq_api_enable'):
         _add_results_for_passthrough_provider('groq')
 
diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
index 667c9ca5..c3a829d1 100644
--- a/refact_webgui/webgui/selfhost_fastapi_completions.py
+++ b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -13,6 +13,7 @@
 from fastapi import APIRouter, HTTPException, Query, Header
 from fastapi.responses import Response, StreamingResponse
 
+from refact_utils.huggingface.utils import huggingface_hub_token
 from refact_utils.scripts import env
 from refact_utils.finetune.utils import running_models_and_loras
 from refact_webgui.webgui.selfhost_model_resolve import resolve_model_context_size
@@ -233,6 +234,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam
         _integrations_env_setup("ANTHROPIC_API_KEY", "anthropic_api_key", "anthropic_api_enable")
         _integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable")
         _integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable")
+        _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable")
 
     def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]:
         rewrite_dict = {}
@@ -337,7 +339,10 @@ async def _passthrough_tokenizer(self, model_path: str) -> str:
         try:
             async with aiohttp.ClientSession() as session:
                 tokenizer_url = f"https://huggingface.co/{model_path}/resolve/main/tokenizer.json"
-                async with session.get(tokenizer_url) as resp:
+                headers = {}
+                if hf_token := huggingface_hub_token():
+                    headers["Authorization"] = f"Bearer {hf_token}"
+                async with session.get(tokenizer_url, headers=headers) as resp:
                     return await resp.text()
         except:
             raise HTTPException(404, detail=f"can't load tokenizer.json for passthrough {model_path}")
diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py
index 7709854e..0569c65a 100644
--- a/refact_webgui/webgui/selfhost_model_assigner.py
+++ b/refact_webgui/webgui/selfhost_model_assigner.py
@@ -186,6 +186,7 @@ def first_run(self):
             "anthropic_api_enable": False,
             "groq_api_enable": False,
             "cerebras_api_enable": False,
+            "gemini_api_enable": False,
         }
         self.models_to_watchdog_configs(default_config)
 
@@ -259,6 +260,7 @@ def model_assignment(self):
             j = json.load(open(env.CONFIG_INFERENCE, "r"))
             j["groq_api_enable"] = j.get("groq_api_enable", False)
             j["cerebras_api_enable"] = j.get("cerebras_api_enable", False)
+            j["gemini_api_enable"] = j.get("gemini_api_enable", False)
         else:
             j = {"model_assign": {}}
 
diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py
index 8dfafb27..13b4a3b0 100644
--- a/refact_webgui/webgui/selfhost_queue.py
+++ b/refact_webgui/webgui/selfhost_queue.py
@@ -68,6 +68,8 @@ def _add_models_for_passthrough_provider(provider):
                 _add_models_for_passthrough_provider('groq')
             if j.get("cerebras_api_enable"):
                 _add_models_for_passthrough_provider('cerebras')
+            if j.get("gemini_api_enable"):
+                _add_models_for_passthrough_provider('gemini')
 
         return self._models_available
 
diff --git a/refact_webgui/webgui/selfhost_static.py b/refact_webgui/webgui/selfhost_static.py
index 097a0861..fa3fd124 100644
--- a/refact_webgui/webgui/selfhost_static.py
+++ b/refact_webgui/webgui/selfhost_static.py
@@ -23,6 +23,7 @@ def __init__(self, *args, **kwargs):
         ]
 
     async def _index(self):
+        print(self.static_folders)
         for spath in self.static_folders:
             fn = os.path.join(spath, "index.html")
             if os.path.exists(fn):
diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html
index 6ecc86d7..ea4a19cc 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.html
+++ b/refact_webgui/webgui/static/tab-model-hosting.html
@@ -46,6 +46,11 @@ <h3>3rd Party APIs</h3>
       <input class="form-check-input" type="checkbox" role="switch" id="enable_cerebras">
       <label class="form-check-label" for="enable_cerebras">Enable Cerebras API</label>
     </div>
+    <div class="form-check form-switch">
+      <input class="form-check-input" type="checkbox" role="switch" id="enable_gemini">
+      <label class="form-check-label" for="enable_gemini">Enable Gemini API</label>
+    </div>
+
     <div class="chat-enabler-status">
       To enable Chat GPT add your API key in the <span id="redirect2credentials" class="main-tab-button fake-link" data-tab="settings">API Keys tab</span>.
     </div>
diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js
index dc2f36d7..ed32070a 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.js
+++ b/refact_webgui/webgui/static/tab-model-hosting.js
@@ -119,6 +119,8 @@ function get_models()
         integration_switch_init('enable_anthropic', models_data['anthropic_api_enable']);
         integration_switch_init('enable_groq', models_data['groq_api_enable']);
         integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']);
+        integration_switch_init('enable_gemini', models_data['gemini_api_enable']);
+
 
         const more_gpus_notification = document.querySelector('.model-hosting-error');
         if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) {
@@ -144,6 +146,8 @@ function save_model_assigned() {
     const anthropic_enable = document.querySelector('#enable_anthropic');
     const groq_enable = document.querySelector('#enable_groq');
     const cerebras_enable = document.querySelector('#enable_cerebras');
+    const gemini_enable = document.querySelector('#enable_gemini');
+
     const data = {
         model_assign: {
             ...models_data.model_assign,
@@ -152,6 +156,7 @@ function save_model_assigned() {
         anthropic_api_enable: anthropic_enable.checked,
         groq_api_enable: groq_enable.checked,
         cerebras_api_enable: cerebras_enable.checked,
+        gemini_api_enable: gemini_enable.checked,
     };
     console.log(data);
     fetch("/tab-host-models-assign", {
diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html
index 18a730b7..511ac395 100644
--- a/refact_webgui/webgui/static/tab-settings.html
+++ b/refact_webgui/webgui/static/tab-settings.html
@@ -10,6 +10,9 @@ <h2>API Integrations</h2>
     <input type="text" name="groq_api_key" value="" class="form-control" id="groq_api_key">
     <label for="cerebras_api_key" class="form-label mt-4">Cerebras API Key</label>
     <input type="text" name="cerebras_api_key" value="" class="form-control" id="cerebras_api_key">
+    <label for="gemini_api_key" class="form-label mt-4">Gemini API Key</label>
+    <input type="text" name="gemini_api_key" value="" class="form-control" id="gemini_api_key">
+
 <!--    <div class="d-flex flex-row-reverse mt-3"><button type="button" class="btn btn-primary" id="integrations-save">Save</button></div>-->
   </div>
 </div>
diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js
index c9597f2e..fa4ddc45 100644
--- a/refact_webgui/webgui/static/tab-settings.js
+++ b/refact_webgui/webgui/static/tab-settings.js
@@ -174,6 +174,8 @@ function save_integration_api_keys() {
     const anthropic_api_key = document.getElementById('anthropic_api_key');
     const groq_api_key = document.getElementById('groq_api_key');
     const cerebras_api_key = document.getElementById('cerebras_api_key');
+    const gemini_api_key = document.getElementById("gemini_api_key");
+
     const huggingface_api_key = document.getElementById('huggingface_api_key');
     fetch("/tab-settings-integrations-save", {
         method: "POST",
@@ -185,6 +187,8 @@ function save_integration_api_keys() {
             anthropic_api_key: anthropic_api_key.getAttribute('data-value'),
             groq_api_key: groq_api_key.getAttribute('data-value'),
             cerebras_api_key: cerebras_api_key.getAttribute('data-value'),
+            gemini_api_key: gemini_api_key.getAttribute("data-value"),
+
             huggingface_api_key: huggingface_api_key.getAttribute('data-value'),
         })
     })
@@ -195,6 +199,8 @@ function save_integration_api_keys() {
         anthropic_api_key.setAttribute('data-saved-value', anthropic_api_key.getAttribute('data-value'))
         groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value'))
         cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value'))
+        gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value'))
+
         huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value'))
     });
 }
@@ -230,6 +236,8 @@ export function tab_settings_integrations_get() {
             integrations_input_init(document.getElementById('anthropic_api_key'), data['anthropic_api_key']);
             integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']);
             integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']);
+            integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']);
+
             integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']);
         });
 }
diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py
index 2f1e241b..08dda4b3 100644
--- a/refact_webgui/webgui/tab_models_host.py
+++ b/refact_webgui/webgui/tab_models_host.py
@@ -44,6 +44,7 @@ class TabHostModelsAssign(BaseModel):
     anthropic_api_enable: bool = False
     groq_api_enable: bool = False
     cerebras_api_enable: bool = False
+    gemini_api_enable: bool = False
 
     model_config = ConfigDict(protected_namespaces=())  # avoiding model_ namespace protection
 
diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py
index 3be0a3f5..814c2f0a 100644
--- a/refact_webgui/webgui/tab_settings.py
+++ b/refact_webgui/webgui/tab_settings.py
@@ -24,6 +24,8 @@ class Integrations(BaseModel):
         anthropic_api_key: Optional[str] = None
         groq_api_key: Optional[str] = None
         cerebras_api_key: Optional[str] = None
+        gemini_api_key: Optional[str] = None
+
         huggingface_api_key: Optional[str] = None
 
     def __init__(self, models_assigner: ModelAssigner, *args, **kwargs):
diff --git a/setup.py b/setup.py
index fef2a05b..e60c28a8 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@ class PyPackage:
                   "bitsandbytes", "safetensors", "peft", "triton",
                   "torchinfo", "mpi4py", "deepspeed>=0.15.3",
                   "sentence-transformers", "huggingface-hub>=0.26.2",
-                  "aiohttp", "setproctitle"],
+                  "aiohttp", "setproctitle", "google-auth>=2.37.0"],
         optional=["ninja", "flash-attn"],
         requires_packages=["refact_known_models", "refact_data_pipeline",
                            "refact_webgui", "refact_utils"],

From 9bcb6599d842ebc2cc8764dbe97ce1df31c7fa71 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 11:51:19 +0000
Subject: [PATCH 2/8] updated version of litellm to support gemini tool usage

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e60c28a8..3dcb8525 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@ class PyPackage:
     "refact_webgui": PyPackage(
         requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2",
                   "starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
-                  "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.49.5"],
+                  "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.55.3"],
         requires_packages=["refact_known_models", "refact_utils"],
         data=["webgui/static/*", "webgui/static/components/modals/*",
               "webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]),

From 12a385e38054cb1a36b9517eeb6c60040d076ece Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 12:05:07 +0000
Subject: [PATCH 3/8] specified pricing for gemini models

---
 refact_known_models/passthrough.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index 3df5c3ed..90d74080 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -214,6 +214,8 @@
     # gemini and gemma bear the same tokenizer
     # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
     # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests
+    # for pricing consult: https://ai.google.dev/pricing
+    # pricing below is assumed for <= 128_000 context is used
 
     "gemini-2.0-flash-exp": {
         "backend": "litellm",
@@ -222,8 +224,8 @@
         "resolve_as": "gemini-2.0-flash-exp",
         "T": 1_048_576,
         "T_out": 8_192,
-        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
-        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "pp1000t_prompt": 75,  # $0.075 / 1M tokens
+        "pp1000t_generated": 300,  # $0.30 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "multimodal"],
     },
     "gemini-1.5-flash": {
@@ -233,8 +235,8 @@
         "resolve_as": "gemini-1.5-flash",
         "T": 1_048_576,
         "T_out": 8_192,
-        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
-        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "pp1000t_prompt": 75,  # $0.075 / 1M tokens
+        "pp1000t_generated": 300,  # $0.30 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "multimodal"],
     },
     "gemini-1.5-flash-8b": {
@@ -244,8 +246,8 @@
         "resolve_as": "gemini-1.5-flash-8b",
         "T": 1_048_576,
         "T_out": 8_192,
-        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
-        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "pp1000t_prompt": 37.5,  # $0.0375 / 1M tokens
+        "pp1000t_generated": 150,  # $0.15 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "multimodal"],
     },
     "gemini-1.5-pro": {
@@ -255,8 +257,7 @@
         "resolve_as": "gemini-1.5-pro",
         "T": 2_097_152,
         "T_out": 8_192,
-        "pp1000t_prompt": 10_000,  # Example price, adjust as needed
-        "pp1000t_generated": 30_000,  # Example price, adjust as needed
+        "pp1000t_prompt": 1250,  # $1.25 / 1M tokens
+        "pp1000t_generated": 5000,  # $5.00 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "multimodal"],
-    }
-}
+    }}

From 35b1ad62f744f4889c260a671bd0b1fb1fbef1f5 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 13:12:02 +0000
Subject: [PATCH 4/8] rollback of refact_webgui/webgui/selfhost_static.py

---
 refact_webgui/webgui/selfhost_static.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/refact_webgui/webgui/selfhost_static.py b/refact_webgui/webgui/selfhost_static.py
index fa3fd124..097a0861 100644
--- a/refact_webgui/webgui/selfhost_static.py
+++ b/refact_webgui/webgui/selfhost_static.py
@@ -23,7 +23,6 @@ def __init__(self, *args, **kwargs):
         ]
 
     async def _index(self):
-        print(self.static_folders)
         for spath in self.static_folders:
             fn = os.path.join(spath, "index.html")
             if os.path.exists(fn):

From 9021d353f294c176232e122d85924258bf5ebc71 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 13:42:52 +0000
Subject: [PATCH 5/8] changed to Xenova Tokenizers for gemini models

---
 refact_known_models/passthrough.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index 90d74080..486cc61f 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -220,7 +220,7 @@
     "gemini-2.0-flash-exp": {
         "backend": "litellm",
         "provider": "gemini",
-        "tokenizer_path": "google/gemma-7b",
+        "tokenizer_path": "Xenova/gemma2-tokenizer",
         "resolve_as": "gemini-2.0-flash-exp",
         "T": 1_048_576,
         "T_out": 8_192,
@@ -231,7 +231,7 @@
     "gemini-1.5-flash": {
         "backend": "litellm",
         "provider": "gemini",
-        "tokenizer_path": "google/gemma-7b",
+        "tokenizer_path": "Xenova/gemma2-tokenizer",
         "resolve_as": "gemini-1.5-flash",
         "T": 1_048_576,
         "T_out": 8_192,
@@ -242,7 +242,7 @@
     "gemini-1.5-flash-8b": {
         "backend": "litellm",
         "provider": "gemini",
-        "tokenizer_path": "google/gemma-7b",
+        "tokenizer_path": "Xenova/gemma2-tokenizer",
         "resolve_as": "gemini-1.5-flash-8b",
         "T": 1_048_576,
         "T_out": 8_192,
@@ -253,11 +253,12 @@
     "gemini-1.5-pro": {
         "backend": "litellm",
         "provider": "gemini",
-        "tokenizer_path": "google/gemma-7b",
+        "tokenizer_path": "Xenova/gemma2-tokenizer",
         "resolve_as": "gemini-1.5-pro",
         "T": 2_097_152,
         "T_out": 8_192,
         "pp1000t_prompt": 1250,  # $1.25 / 1M tokens
         "pp1000t_generated": 5000,  # $5.00 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "multimodal"],
-    }}
+    }
+}

From 6b07d6a54e8c4f90d75070c5d5104a1e0882b398 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 13:57:41 +0000
Subject: [PATCH 6/8] adjusted model dicts

---
 refact_known_models/passthrough.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index 486cc61f..4ed43cb7 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -221,44 +221,44 @@
         "backend": "litellm",
         "provider": "gemini",
         "tokenizer_path": "Xenova/gemma2-tokenizer",
-        "resolve_as": "gemini-2.0-flash-exp",
+        "resolve_as": "gemini/gemini-2.0-flash-exp",
         "T": 1_048_576,
         "T_out": 8_192,
         "pp1000t_prompt": 75,  # $0.075 / 1M tokens
         "pp1000t_generated": 300,  # $0.30 / 1M tokens
-        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+        "filter_caps": ["chat", "tools", "completion", "vision"],
     },
     "gemini-1.5-flash": {
         "backend": "litellm",
         "provider": "gemini",
         "tokenizer_path": "Xenova/gemma2-tokenizer",
-        "resolve_as": "gemini-1.5-flash",
+        "resolve_as": "gemini/gemini-1.5-flash",
         "T": 1_048_576,
         "T_out": 8_192,
         "pp1000t_prompt": 75,  # $0.075 / 1M tokens
         "pp1000t_generated": 300,  # $0.30 / 1M tokens
-        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+        "filter_caps": ["chat", "tools", "completion", "vision"],
     },
     "gemini-1.5-flash-8b": {
         "backend": "litellm",
         "provider": "gemini",
         "tokenizer_path": "Xenova/gemma2-tokenizer",
-        "resolve_as": "gemini-1.5-flash-8b",
+        "resolve_as": "gemini/gemini-1.5-flash-8b",
         "T": 1_048_576,
         "T_out": 8_192,
         "pp1000t_prompt": 37.5,  # $0.0375 / 1M tokens
         "pp1000t_generated": 150,  # $0.15 / 1M tokens
-        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+        "filter_caps": ["chat", "tools", "completion", "vision"],
     },
     "gemini-1.5-pro": {
         "backend": "litellm",
         "provider": "gemini",
         "tokenizer_path": "Xenova/gemma2-tokenizer",
-        "resolve_as": "gemini-1.5-pro",
+        "resolve_as": "gemini/gemini-1.5-pro",
         "T": 2_097_152,
         "T_out": 8_192,
         "pp1000t_prompt": 1250,  # $1.25 / 1M tokens
         "pp1000t_generated": 5000,  # $5.00 / 1M tokens
-        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+        "filter_caps": ["chat", "tools", "completion", "vision"],
     }
 }

From c49b7c9116372098ddee38d49a3dbcb33e7fa002 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 15:25:27 +0000
Subject: [PATCH 7/8] removed support for gated tokenizers

---
 refact_webgui/webgui/selfhost_fastapi_completions.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
index c3a829d1..5695cf2b 100644
--- a/refact_webgui/webgui/selfhost_fastapi_completions.py
+++ b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -339,10 +339,7 @@ async def _passthrough_tokenizer(self, model_path: str) -> str:
         try:
             async with aiohttp.ClientSession() as session:
                 tokenizer_url = f"https://huggingface.co/{model_path}/resolve/main/tokenizer.json"
-                headers = {}
-                if hf_token := huggingface_hub_token():
-                    headers["Authorization"] = f"Bearer {hf_token}"
-                async with session.get(tokenizer_url, headers=headers) as resp:
+                async with session.get(tokenizer_url) as resp:
                     return await resp.text()
         except:
             raise HTTPException(404, detail=f"can't load tokenizer.json for passthrough {model_path}")

From bc4c05d7120c7d08753a69b87907b79c04f82ff0 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 15:48:08 +0000
Subject: [PATCH 8/8] redundant import

---
 refact_webgui/webgui/selfhost_fastapi_completions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
index 5695cf2b..4f9bb8c4 100644
--- a/refact_webgui/webgui/selfhost_fastapi_completions.py
+++ b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -13,7 +13,6 @@
 from fastapi import APIRouter, HTTPException, Query, Header
 from fastapi.responses import Response, StreamingResponse
 
-from refact_utils.huggingface.utils import huggingface_hub_token
 from refact_utils.scripts import env
 from refact_utils.finetune.utils import running_models_and_loras
 from refact_webgui.webgui.selfhost_model_resolve import resolve_model_context_size