From 0b18f4da667601e9ffb3a4ccdd8fe65721504e61 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Mon, 16 Dec 2024 18:10:00 +0000 Subject: [PATCH 1/8] added gemini models --- refact_known_models/passthrough.py | 49 +++++++++++++++++++ refact_utils/finetune/utils.py | 3 ++ .../webgui/selfhost_fastapi_completions.py | 7 ++- .../webgui/selfhost_model_assigner.py | 2 + refact_webgui/webgui/selfhost_queue.py | 2 + refact_webgui/webgui/selfhost_static.py | 1 + .../webgui/static/tab-model-hosting.html | 5 ++ .../webgui/static/tab-model-hosting.js | 5 ++ refact_webgui/webgui/static/tab-settings.html | 3 ++ refact_webgui/webgui/static/tab-settings.js | 8 +++ refact_webgui/webgui/tab_models_host.py | 1 + refact_webgui/webgui/tab_settings.py | 2 + setup.py | 2 +- 13 files changed, 88 insertions(+), 2 deletions(-) diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index a2f6f0a7..3df5c3ed 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -209,5 +209,54 @@ "pp1000t_prompt": 150, "pp1000t_generated": 600, # TODO: don't know the price "filter_caps": ["chat", "completion"], + }, + + # gemini and gemma bear the same tokenizer + # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677 + # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests + + "gemini-2.0-flash-exp": { + "backend": "litellm", + "provider": "gemini", + "tokenizer_path": "google/gemma-7b", + "resolve_as": "gemini-2.0-flash-exp", + "T": 1_048_576, + "T_out": 8_192, + "pp1000t_prompt": 10_000, # Example price, adjust as needed + "pp1000t_generated": 30_000, # Example price, adjust as needed + "filter_caps": ["chat", "tools", "completion", "multimodal"], + }, + "gemini-1.5-flash": { + "backend": "litellm", + "provider": "gemini", + "tokenizer_path": "google/gemma-7b", + "resolve_as": "gemini-1.5-flash", + "T": 1_048_576, + "T_out": 8_192, + "pp1000t_prompt": 10_000, # Example price, adjust as needed + "pp1000t_generated": 30_000, # Example price, adjust as needed + "filter_caps": ["chat", "tools", "completion", "multimodal"], + }, + "gemini-1.5-flash-8b": { + "backend": "litellm", + "provider": "gemini", + "tokenizer_path": "google/gemma-7b", + "resolve_as": "gemini-1.5-flash-8b", + "T": 1_048_576, + "T_out": 8_192, + "pp1000t_prompt": 10_000, # Example price, adjust as needed + "pp1000t_generated": 30_000, # Example price, adjust as needed + "filter_caps": ["chat", "tools", "completion", "multimodal"], + }, + "gemini-1.5-pro": { + "backend": "litellm", + "provider": "gemini", + "tokenizer_path": "google/gemma-7b", + "resolve_as": "gemini-1.5-pro", + "T": 2_097_152, + "T_out": 8_192, + "pp1000t_prompt": 10_000, # Example price, adjust as needed + "pp1000t_generated": 30_000, # Example price, adjust as needed + "filter_caps": ["chat", "tools", "completion", "multimodal"], } } diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py index a196f050..81b753e0 100644 --- a/refact_utils/finetune/utils.py +++ b/refact_utils/finetune/utils.py @@ -112,6 +112,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if data.get('cerebras_api_enable'): _add_results_for_passthrough_provider('cerebras') + if data.get('gemini_api_enable'): + _add_results_for_passthrough_provider('gemini') + if data.get('groq_api_enable'): _add_results_for_passthrough_provider('groq') diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 667c9ca5..c3a829d1 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -13,6 +13,7 @@ from fastapi import APIRouter, HTTPException, Query, Header from fastapi.responses import Response, StreamingResponse +from refact_utils.huggingface.utils import huggingface_hub_token from refact_utils.scripts import env from refact_utils.finetune.utils import running_models_and_loras from refact_webgui.webgui.selfhost_model_resolve import resolve_model_context_size @@ -233,6 +234,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam _integrations_env_setup("ANTHROPIC_API_KEY", "anthropic_api_key", "anthropic_api_enable") _integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable") _integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable") + _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable") def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]: rewrite_dict = {} @@ -337,7 +339,10 @@ async def _passthrough_tokenizer(self, model_path: str) -> str: try: async with aiohttp.ClientSession() as session: tokenizer_url = f"https://huggingface.co/{model_path}/resolve/main/tokenizer.json" - async with session.get(tokenizer_url) as resp: + headers = {} + if hf_token := huggingface_hub_token(): + headers["Authorization"] = f"Bearer {hf_token}" + async with session.get(tokenizer_url, headers=headers) as resp: return await resp.text() except: raise HTTPException(404, detail=f"can't load tokenizer.json for passthrough {model_path}") diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py index 7709854e..0569c65a 100644 --- a/refact_webgui/webgui/selfhost_model_assigner.py +++ b/refact_webgui/webgui/selfhost_model_assigner.py @@ -186,6 +186,7 @@ def first_run(self): "anthropic_api_enable": False, "groq_api_enable": False, "cerebras_api_enable": False, + "gemini_api_enable": False, } self.models_to_watchdog_configs(default_config) @@ -259,6 +260,7 @@ def model_assignment(self): j = json.load(open(env.CONFIG_INFERENCE, "r")) j["groq_api_enable"] = j.get("groq_api_enable", False) j["cerebras_api_enable"] = j.get("cerebras_api_enable", False) + j["gemini_api_enable"] = j.get("gemini_api_enable", False) else: j = {"model_assign": {}} diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py index 8dfafb27..13b4a3b0 100644 --- a/refact_webgui/webgui/selfhost_queue.py +++ b/refact_webgui/webgui/selfhost_queue.py @@ -68,6 +68,8 @@ def _add_models_for_passthrough_provider(provider): _add_models_for_passthrough_provider('groq') if j.get("cerebras_api_enable"): _add_models_for_passthrough_provider('cerebras') + if j.get("gemini_api_enable"): + _add_models_for_passthrough_provider('gemini') return self._models_available diff --git a/refact_webgui/webgui/selfhost_static.py b/refact_webgui/webgui/selfhost_static.py index 097a0861..fa3fd124 100644 --- a/refact_webgui/webgui/selfhost_static.py +++ b/refact_webgui/webgui/selfhost_static.py @@ -23,6 +23,7 @@ def __init__(self, *args, **kwargs): ] async def _index(self): + print(self.static_folders) for spath in self.static_folders: fn = os.path.join(spath, "index.html") if os.path.exists(fn): diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html index 6ecc86d7..ea4a19cc 100644 --- a/refact_webgui/webgui/static/tab-model-hosting.html +++ b/refact_webgui/webgui/static/tab-model-hosting.html @@ -46,6 +46,11 @@

3rd Party APIs

+
+ + +
+
To enable Chat GPT add your API key in the API Keys tab.
diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js index dc2f36d7..ed32070a 100644 --- a/refact_webgui/webgui/static/tab-model-hosting.js +++ b/refact_webgui/webgui/static/tab-model-hosting.js @@ -119,6 +119,8 @@ function get_models() integration_switch_init('enable_anthropic', models_data['anthropic_api_enable']); integration_switch_init('enable_groq', models_data['groq_api_enable']); integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']); + integration_switch_init('enable_gemini', models_data['gemini_api_enable']); + const more_gpus_notification = document.querySelector('.model-hosting-error'); if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) { @@ -144,6 +146,8 @@ function save_model_assigned() { const anthropic_enable = document.querySelector('#enable_anthropic'); const groq_enable = document.querySelector('#enable_groq'); const cerebras_enable = document.querySelector('#enable_cerebras'); + const gemini_enable = document.querySelector('#enable_gemini'); + const data = { model_assign: { ...models_data.model_assign, @@ -152,6 +156,7 @@ function save_model_assigned() { anthropic_api_enable: anthropic_enable.checked, groq_api_enable: groq_enable.checked, cerebras_api_enable: cerebras_enable.checked, + gemini_api_enable: gemini_enable.checked, }; console.log(data); fetch("/tab-host-models-assign", { diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html index 18a730b7..511ac395 100644 --- a/refact_webgui/webgui/static/tab-settings.html +++ b/refact_webgui/webgui/static/tab-settings.html @@ -10,6 +10,9 @@

API Integrations

+ + + diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js index c9597f2e..fa4ddc45 100644 --- a/refact_webgui/webgui/static/tab-settings.js +++ b/refact_webgui/webgui/static/tab-settings.js @@ -174,6 +174,8 @@ function save_integration_api_keys() { const anthropic_api_key = document.getElementById('anthropic_api_key'); const groq_api_key = document.getElementById('groq_api_key'); const cerebras_api_key = document.getElementById('cerebras_api_key'); + const gemini_api_key = document.getElementById("gemini_api_key"); + const huggingface_api_key = document.getElementById('huggingface_api_key'); fetch("/tab-settings-integrations-save", { method: "POST", @@ -185,6 +187,8 @@ function save_integration_api_keys() { anthropic_api_key: anthropic_api_key.getAttribute('data-value'), groq_api_key: groq_api_key.getAttribute('data-value'), cerebras_api_key: cerebras_api_key.getAttribute('data-value'), + gemini_api_key: gemini_api_key.getAttribute("data-value"), + huggingface_api_key: huggingface_api_key.getAttribute('data-value'), }) }) @@ -195,6 +199,8 @@ function save_integration_api_keys() { anthropic_api_key.setAttribute('data-saved-value', anthropic_api_key.getAttribute('data-value')) groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value')) cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value')) + gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value')) + huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value')) }); } @@ -230,6 +236,8 @@ export function tab_settings_integrations_get() { integrations_input_init(document.getElementById('anthropic_api_key'), data['anthropic_api_key']); integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']); integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']); + integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']); + integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']); }); } diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py index 2f1e241b..08dda4b3 100644 --- a/refact_webgui/webgui/tab_models_host.py +++ b/refact_webgui/webgui/tab_models_host.py @@ -44,6 +44,7 @@ class TabHostModelsAssign(BaseModel): anthropic_api_enable: bool = False groq_api_enable: bool = False cerebras_api_enable: bool = False + gemini_api_enable: bool = False model_config = ConfigDict(protected_namespaces=()) # avoiding model_ namespace protection diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py index 3be0a3f5..814c2f0a 100644 --- a/refact_webgui/webgui/tab_settings.py +++ b/refact_webgui/webgui/tab_settings.py @@ -24,6 +24,8 @@ class Integrations(BaseModel): anthropic_api_key: Optional[str] = None groq_api_key: Optional[str] = None cerebras_api_key: Optional[str] = None + gemini_api_key: Optional[str] = None + huggingface_api_key: Optional[str] = None def __init__(self, models_assigner: ModelAssigner, *args, **kwargs): diff --git a/setup.py b/setup.py index fef2a05b..e60c28a8 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ class PyPackage: "bitsandbytes", "safetensors", "peft", "triton", "torchinfo", "mpi4py", "deepspeed>=0.15.3", "sentence-transformers", "huggingface-hub>=0.26.2", - "aiohttp", "setproctitle"], + "aiohttp", "setproctitle", "google-auth>=2.37.0"], optional=["ninja", "flash-attn"], requires_packages=["refact_known_models", "refact_data_pipeline", "refact_webgui", "refact_utils"], From 9bcb6599d842ebc2cc8764dbe97ce1df31c7fa71 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 11:51:19 +0000 Subject: [PATCH 2/8] updated version of litellm to support gemini tool usage --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e60c28a8..3dcb8525 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ class PyPackage: "refact_webgui": PyPackage( requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2", "starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools", - "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.49.5"], + "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.55.3"], requires_packages=["refact_known_models", "refact_utils"], data=["webgui/static/*", "webgui/static/components/modals/*", "webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]), From 12a385e38054cb1a36b9517eeb6c60040d076ece Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 12:05:07 +0000 Subject: [PATCH 3/8] specified pricing for gemini models --- refact_known_models/passthrough.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index 3df5c3ed..90d74080 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -214,6 +214,8 @@ # gemini and gemma bear the same tokenizer # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677 # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests + # for pricing consult: https://ai.google.dev/pricing + # pricing below is assumed for <= 128_000 context is used "gemini-2.0-flash-exp": { "backend": "litellm", @@ -222,8 +224,8 @@ "resolve_as": "gemini-2.0-flash-exp", "T": 1_048_576, "T_out": 8_192, - "pp1000t_prompt": 10_000, # Example price, adjust as needed - "pp1000t_generated": 30_000, # Example price, adjust as needed + "pp1000t_prompt": 75, # $0.075 / 1M tokens + "pp1000t_generated": 300, # $0.30 / 1M tokens "filter_caps": ["chat", "tools", "completion", "multimodal"], }, "gemini-1.5-flash": { @@ -233,8 +235,8 @@ "resolve_as": "gemini-1.5-flash", "T": 1_048_576, "T_out": 8_192, - "pp1000t_prompt": 10_000, # Example price, adjust as needed - "pp1000t_generated": 30_000, # Example price, adjust as needed + "pp1000t_prompt": 75, # $0.075 / 1M tokens + "pp1000t_generated": 300, # $0.30 / 1M tokens "filter_caps": ["chat", "tools", "completion", "multimodal"], }, "gemini-1.5-flash-8b": { @@ -244,8 +246,8 @@ "resolve_as": "gemini-1.5-flash-8b", "T": 1_048_576, "T_out": 8_192, - "pp1000t_prompt": 10_000, # Example price, adjust as needed - "pp1000t_generated": 30_000, # Example price, adjust as needed + "pp1000t_prompt": 37.5, # $0.0375 / 1M tokens + "pp1000t_generated": 150, # $0.15 / 1M tokens "filter_caps": ["chat", "tools", "completion", "multimodal"], }, "gemini-1.5-pro": { @@ -255,8 +257,7 @@ "resolve_as": "gemini-1.5-pro", "T": 2_097_152, "T_out": 8_192, - "pp1000t_prompt": 10_000, # Example price, adjust as needed - "pp1000t_generated": 30_000, # Example price, adjust as needed + "pp1000t_prompt": 1250, # $1.25 / 1M tokens + "pp1000t_generated": 5000, # $5.00 / 1M tokens "filter_caps": ["chat", "tools", "completion", "multimodal"], - } -} + }} From 35b1ad62f744f4889c260a671bd0b1fb1fbef1f5 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 13:12:02 +0000 Subject: [PATCH 4/8] rollback of refact_webgui/webgui/selfhost_static.py --- refact_webgui/webgui/selfhost_static.py | 1 - 1 file changed, 1 deletion(-) diff --git a/refact_webgui/webgui/selfhost_static.py b/refact_webgui/webgui/selfhost_static.py index fa3fd124..097a0861 100644 --- a/refact_webgui/webgui/selfhost_static.py +++ b/refact_webgui/webgui/selfhost_static.py @@ -23,7 +23,6 @@ def __init__(self, *args, **kwargs): ] async def _index(self): - print(self.static_folders) for spath in self.static_folders: fn = os.path.join(spath, "index.html") if os.path.exists(fn): From 9021d353f294c176232e122d85924258bf5ebc71 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 13:42:52 +0000 Subject: [PATCH 5/8] changed to Xenova Tokenizers for gemini models --- refact_known_models/passthrough.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index 90d74080..486cc61f 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -220,7 +220,7 @@ "gemini-2.0-flash-exp": { "backend": "litellm", "provider": "gemini", - "tokenizer_path": "google/gemma-7b", + "tokenizer_path": "Xenova/gemma2-tokenizer", "resolve_as": "gemini-2.0-flash-exp", "T": 1_048_576, "T_out": 8_192, @@ -231,7 +231,7 @@ "gemini-1.5-flash": { "backend": "litellm", "provider": "gemini", - "tokenizer_path": "google/gemma-7b", + "tokenizer_path": "Xenova/gemma2-tokenizer", "resolve_as": "gemini-1.5-flash", "T": 1_048_576, "T_out": 8_192, @@ -242,7 +242,7 @@ "gemini-1.5-flash-8b": { "backend": "litellm", "provider": "gemini", - "tokenizer_path": "google/gemma-7b", + "tokenizer_path": "Xenova/gemma2-tokenizer", "resolve_as": "gemini-1.5-flash-8b", "T": 1_048_576, "T_out": 8_192, @@ -253,11 +253,12 @@ "gemini-1.5-pro": { "backend": "litellm", "provider": "gemini", - "tokenizer_path": "google/gemma-7b", + "tokenizer_path": "Xenova/gemma2-tokenizer", "resolve_as": "gemini-1.5-pro", "T": 2_097_152, "T_out": 8_192, "pp1000t_prompt": 1250, # $1.25 / 1M tokens "pp1000t_generated": 5000, # $5.00 / 1M tokens "filter_caps": ["chat", "tools", "completion", "multimodal"], - }} + } +} From 6b07d6a54e8c4f90d75070c5d5104a1e0882b398 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 13:57:41 +0000 Subject: [PATCH 6/8] adjusted model dicts --- refact_known_models/passthrough.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index 486cc61f..4ed43cb7 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -221,44 +221,44 @@ "backend": "litellm", "provider": "gemini", "tokenizer_path": "Xenova/gemma2-tokenizer", - "resolve_as": "gemini-2.0-flash-exp", + "resolve_as": "gemini/gemini-2.0-flash-exp", "T": 1_048_576, "T_out": 8_192, "pp1000t_prompt": 75, # $0.075 / 1M tokens "pp1000t_generated": 300, # $0.30 / 1M tokens - "filter_caps": ["chat", "tools", "completion", "multimodal"], + "filter_caps": ["chat", "tools", "completion", "vision"], }, "gemini-1.5-flash": { "backend": "litellm", "provider": "gemini", "tokenizer_path": "Xenova/gemma2-tokenizer", - "resolve_as": "gemini-1.5-flash", + "resolve_as": "gemini/gemini-1.5-flash", "T": 1_048_576, "T_out": 8_192, "pp1000t_prompt": 75, # $0.075 / 1M tokens "pp1000t_generated": 300, # $0.30 / 1M tokens - "filter_caps": ["chat", "tools", "completion", "multimodal"], + "filter_caps": ["chat", "tools", "completion", "vision"], }, "gemini-1.5-flash-8b": { "backend": "litellm", "provider": "gemini", "tokenizer_path": "Xenova/gemma2-tokenizer", - "resolve_as": "gemini-1.5-flash-8b", + "resolve_as": "gemini/gemini-1.5-flash-8b", "T": 1_048_576, "T_out": 8_192, "pp1000t_prompt": 37.5, # $0.0375 / 1M tokens "pp1000t_generated": 150, # $0.15 / 1M tokens - "filter_caps": ["chat", "tools", "completion", "multimodal"], + "filter_caps": ["chat", "tools", "completion", "vision"], }, "gemini-1.5-pro": { "backend": "litellm", "provider": "gemini", "tokenizer_path": "Xenova/gemma2-tokenizer", - "resolve_as": "gemini-1.5-pro", + "resolve_as": "gemini/gemini-1.5-pro", "T": 2_097_152, "T_out": 8_192, "pp1000t_prompt": 1250, # $1.25 / 1M tokens "pp1000t_generated": 5000, # $5.00 / 1M tokens - "filter_caps": ["chat", "tools", "completion", "multimodal"], + "filter_caps": ["chat", "tools", "completion", "vision"], } } From c49b7c9116372098ddee38d49a3dbcb33e7fa002 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 15:25:27 +0000 Subject: [PATCH 7/8] removed support for gated tokenizers --- refact_webgui/webgui/selfhost_fastapi_completions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index c3a829d1..5695cf2b 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -339,10 +339,7 @@ async def _passthrough_tokenizer(self, model_path: str) -> str: try: async with aiohttp.ClientSession() as session: tokenizer_url = f"https://huggingface.co/{model_path}/resolve/main/tokenizer.json" - headers = {} - if hf_token := huggingface_hub_token(): - headers["Authorization"] = f"Bearer {hf_token}" - async with session.get(tokenizer_url, headers=headers) as resp: + async with session.get(tokenizer_url) as resp: return await resp.text() except: raise HTTPException(404, detail=f"can't load tokenizer.json for passthrough {model_path}") From bc4c05d7120c7d08753a69b87907b79c04f82ff0 Mon Sep 17 00:00:00 2001 From: V4LER11 Date: Tue, 17 Dec 2024 15:48:08 +0000 Subject: [PATCH 8/8] redundant import --- refact_webgui/webgui/selfhost_fastapi_completions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 5695cf2b..4f9bb8c4 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -13,7 +13,6 @@ from fastapi import APIRouter, HTTPException, Query, Header from fastapi.responses import Response, StreamingResponse -from refact_utils.huggingface.utils import huggingface_hub_token from refact_utils.scripts import env from refact_utils.finetune.utils import running_models_and_loras from refact_webgui.webgui.selfhost_model_resolve import resolve_model_context_size