diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index a2f6f0a7..4ed43cb7 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -209,5 +209,56 @@
"pp1000t_prompt": 150,
"pp1000t_generated": 600, # TODO: don't know the price
"filter_caps": ["chat", "completion"],
+ },
+
+ # gemini and gemma bear the same tokenizer
+ # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
+ # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests
+ # for pricing consult: https://ai.google.dev/pricing
+ # pricing below is assumed for <= 128_000 context is used
+
+ "gemini-2.0-flash-exp": {
+ "backend": "litellm",
+ "provider": "gemini",
+ "tokenizer_path": "Xenova/gemma2-tokenizer",
+ "resolve_as": "gemini/gemini-2.0-flash-exp",
+ "T": 1_048_576,
+ "T_out": 8_192,
+ "pp1000t_prompt": 75, # $0.075 / 1M tokens
+ "pp1000t_generated": 300, # $0.30 / 1M tokens
+ "filter_caps": ["chat", "tools", "completion", "vision"],
+ },
+ "gemini-1.5-flash": {
+ "backend": "litellm",
+ "provider": "gemini",
+ "tokenizer_path": "Xenova/gemma2-tokenizer",
+ "resolve_as": "gemini/gemini-1.5-flash",
+ "T": 1_048_576,
+ "T_out": 8_192,
+ "pp1000t_prompt": 75, # $0.075 / 1M tokens
+ "pp1000t_generated": 300, # $0.30 / 1M tokens
+ "filter_caps": ["chat", "tools", "completion", "vision"],
+ },
+ "gemini-1.5-flash-8b": {
+ "backend": "litellm",
+ "provider": "gemini",
+ "tokenizer_path": "Xenova/gemma2-tokenizer",
+ "resolve_as": "gemini/gemini-1.5-flash-8b",
+ "T": 1_048_576,
+ "T_out": 8_192,
+ "pp1000t_prompt": 37.5, # $0.0375 / 1M tokens
+ "pp1000t_generated": 150, # $0.15 / 1M tokens
+ "filter_caps": ["chat", "tools", "completion", "vision"],
+ },
+ "gemini-1.5-pro": {
+ "backend": "litellm",
+ "provider": "gemini",
+ "tokenizer_path": "Xenova/gemma2-tokenizer",
+ "resolve_as": "gemini/gemini-1.5-pro",
+ "T": 2_097_152,
+ "T_out": 8_192,
+ "pp1000t_prompt": 1250, # $1.25 / 1M tokens
+ "pp1000t_generated": 5000, # $5.00 / 1M tokens
+ "filter_caps": ["chat", "tools", "completion", "vision"],
}
}
diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py
index a196f050..81b753e0 100644
--- a/refact_utils/finetune/utils.py
+++ b/refact_utils/finetune/utils.py
@@ -112,6 +112,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None:
if data.get('cerebras_api_enable'):
_add_results_for_passthrough_provider('cerebras')
+ if data.get('gemini_api_enable'):
+ _add_results_for_passthrough_provider('gemini')
+
if data.get('groq_api_enable'):
_add_results_for_passthrough_provider('groq')
diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
index 667c9ca5..4f9bb8c4 100644
--- a/refact_webgui/webgui/selfhost_fastapi_completions.py
+++ b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -233,6 +233,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam
_integrations_env_setup("ANTHROPIC_API_KEY", "anthropic_api_key", "anthropic_api_enable")
_integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable")
_integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable")
+ _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable")
def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]:
rewrite_dict = {}
diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py
index 7709854e..0569c65a 100644
--- a/refact_webgui/webgui/selfhost_model_assigner.py
+++ b/refact_webgui/webgui/selfhost_model_assigner.py
@@ -186,6 +186,7 @@ def first_run(self):
"anthropic_api_enable": False,
"groq_api_enable": False,
"cerebras_api_enable": False,
+ "gemini_api_enable": False,
}
self.models_to_watchdog_configs(default_config)
@@ -259,6 +260,7 @@ def model_assignment(self):
j = json.load(open(env.CONFIG_INFERENCE, "r"))
j["groq_api_enable"] = j.get("groq_api_enable", False)
j["cerebras_api_enable"] = j.get("cerebras_api_enable", False)
+ j["gemini_api_enable"] = j.get("gemini_api_enable", False)
else:
j = {"model_assign": {}}
diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py
index 8dfafb27..13b4a3b0 100644
--- a/refact_webgui/webgui/selfhost_queue.py
+++ b/refact_webgui/webgui/selfhost_queue.py
@@ -68,6 +68,8 @@ def _add_models_for_passthrough_provider(provider):
_add_models_for_passthrough_provider('groq')
if j.get("cerebras_api_enable"):
_add_models_for_passthrough_provider('cerebras')
+ if j.get("gemini_api_enable"):
+ _add_models_for_passthrough_provider('gemini')
return self._models_available
diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html
index 6ecc86d7..ea4a19cc 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.html
+++ b/refact_webgui/webgui/static/tab-model-hosting.html
@@ -46,6 +46,11 @@
3rd Party APIs
+
+
+
+
+
To enable Chat GPT add your API key in the API Keys tab.
diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js
index dc2f36d7..ed32070a 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.js
+++ b/refact_webgui/webgui/static/tab-model-hosting.js
@@ -119,6 +119,8 @@ function get_models()
integration_switch_init('enable_anthropic', models_data['anthropic_api_enable']);
integration_switch_init('enable_groq', models_data['groq_api_enable']);
integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']);
+ integration_switch_init('enable_gemini', models_data['gemini_api_enable']);
+
const more_gpus_notification = document.querySelector('.model-hosting-error');
if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) {
@@ -144,6 +146,8 @@ function save_model_assigned() {
const anthropic_enable = document.querySelector('#enable_anthropic');
const groq_enable = document.querySelector('#enable_groq');
const cerebras_enable = document.querySelector('#enable_cerebras');
+ const gemini_enable = document.querySelector('#enable_gemini');
+
const data = {
model_assign: {
...models_data.model_assign,
@@ -152,6 +156,7 @@ function save_model_assigned() {
anthropic_api_enable: anthropic_enable.checked,
groq_api_enable: groq_enable.checked,
cerebras_api_enable: cerebras_enable.checked,
+ gemini_api_enable: gemini_enable.checked,
};
console.log(data);
fetch("/tab-host-models-assign", {
diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html
index 18a730b7..511ac395 100644
--- a/refact_webgui/webgui/static/tab-settings.html
+++ b/refact_webgui/webgui/static/tab-settings.html
@@ -10,6 +10,9 @@ API Integrations
+
+
+
diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js
index c9597f2e..fa4ddc45 100644
--- a/refact_webgui/webgui/static/tab-settings.js
+++ b/refact_webgui/webgui/static/tab-settings.js
@@ -174,6 +174,8 @@ function save_integration_api_keys() {
const anthropic_api_key = document.getElementById('anthropic_api_key');
const groq_api_key = document.getElementById('groq_api_key');
const cerebras_api_key = document.getElementById('cerebras_api_key');
+ const gemini_api_key = document.getElementById("gemini_api_key");
+
const huggingface_api_key = document.getElementById('huggingface_api_key');
fetch("/tab-settings-integrations-save", {
method: "POST",
@@ -185,6 +187,8 @@ function save_integration_api_keys() {
anthropic_api_key: anthropic_api_key.getAttribute('data-value'),
groq_api_key: groq_api_key.getAttribute('data-value'),
cerebras_api_key: cerebras_api_key.getAttribute('data-value'),
+ gemini_api_key: gemini_api_key.getAttribute("data-value"),
+
huggingface_api_key: huggingface_api_key.getAttribute('data-value'),
})
})
@@ -195,6 +199,8 @@ function save_integration_api_keys() {
anthropic_api_key.setAttribute('data-saved-value', anthropic_api_key.getAttribute('data-value'))
groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value'))
cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value'))
+ gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value'))
+
huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value'))
});
}
@@ -230,6 +236,8 @@ export function tab_settings_integrations_get() {
integrations_input_init(document.getElementById('anthropic_api_key'), data['anthropic_api_key']);
integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']);
integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']);
+ integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']);
+
integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']);
});
}
diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py
index 2f1e241b..08dda4b3 100644
--- a/refact_webgui/webgui/tab_models_host.py
+++ b/refact_webgui/webgui/tab_models_host.py
@@ -44,6 +44,7 @@ class TabHostModelsAssign(BaseModel):
anthropic_api_enable: bool = False
groq_api_enable: bool = False
cerebras_api_enable: bool = False
+ gemini_api_enable: bool = False
model_config = ConfigDict(protected_namespaces=()) # avoiding model_ namespace protection
diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py
index 3be0a3f5..814c2f0a 100644
--- a/refact_webgui/webgui/tab_settings.py
+++ b/refact_webgui/webgui/tab_settings.py
@@ -24,6 +24,8 @@ class Integrations(BaseModel):
anthropic_api_key: Optional[str] = None
groq_api_key: Optional[str] = None
cerebras_api_key: Optional[str] = None
+ gemini_api_key: Optional[str] = None
+
huggingface_api_key: Optional[str] = None
def __init__(self, models_assigner: ModelAssigner, *args, **kwargs):
diff --git a/setup.py b/setup.py
index fef2a05b..3dcb8525 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@ class PyPackage:
"refact_webgui": PyPackage(
requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2",
"starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
- "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.49.5"],
+ "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.55.3"],
requires_packages=["refact_known_models", "refact_utils"],
data=["webgui/static/*", "webgui/static/components/modals/*",
"webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]),
@@ -45,7 +45,7 @@ class PyPackage:
"bitsandbytes", "safetensors", "peft", "triton",
"torchinfo", "mpi4py", "deepspeed>=0.15.3",
"sentence-transformers", "huggingface-hub>=0.26.2",
- "aiohttp", "setproctitle"],
+ "aiohttp", "setproctitle", "google-auth>=2.37.0"],
optional=["ninja", "flash-attn"],
requires_packages=["refact_known_models", "refact_data_pipeline",
"refact_webgui", "refact_utils"],