Skip to content

Commit

Permalink
Merge pull request #78 from weaviate/feature/add-mxbai-embed-large-v1
Browse files Browse the repository at this point in the history
Add support for mixedbread-ai/mxbai-embed-large-v1
  • Loading branch information
antas-marcin authored Apr 3, 2024
2 parents 589e07e + ed938ac commit 7381dd5
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 17 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,20 @@ jobs:
- model_name: sentence-transformers/all-MiniLM-L6-v2
model_tag_name: sentence-transformers-all-MiniLM-L6-v2
onnx_runtime: true
- model_name: mixedbread-ai/mxbai-embed-large-v1
model_tag_name: mixedbread-ai-mxbai-embed-large-v1
onnx_runtime: false
use_sentence_transformers_vectorizer: true
- model_name: mixedbread-ai/mxbai-embed-large-v1
model_tag_name: mixedbread-ai-mxbai-embed-large-v1
onnx_runtime: true
env:
LOCAL_REPO: transformers-inference
REMOTE_REPO: semitechnologies/transformers-inference
MODEL_NAME: ${{matrix.model_name}}
MODEL_TAG_NAME: ${{matrix.model_tag_name}}
ONNX_RUNTIME: ${{matrix.onnx_runtime}}
USE_SENTENCE_TRANSFORMERS_VECTORIZER: ${{matrix.use_sentence_transformers_vectorizer}}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ The pre-built models include:
|`google/flan-t5-large` ([Info](https://huggingface.co/google/flan-t5-large))|`semitechnologies/transformers-inference:sentence-transformers-gtr-t5-large`|
|`BAAI/bge-small-en-v1.5` ([Info](https://huggingface.co/BAAI/bge-small-en-v1.5))|`semitechnologies/transformers-inference:baai-bge-small-en-v1.5`|
|`BAAI/bge-base-en-v1.5` ([Info](https://huggingface.co/BAAI/bge-base-en-v1.5))|`semitechnologies/transformers-inference:baai-bge-base-en-v1.5`|
|`mixedbread-ai/mxbai-embed-large-v1` ([Info](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1))|`semitechnologies/transformers-inference:mixedbread-ai-mxbai-embed-large-v1`|
|DPR Models|
|`facebook/dpr-ctx_encoder-single-nq-base` ([Info](https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base))|`semitechnologies/transformers-inference:facebook-dpr-ctx_encoder-single-nq-base`|
|`facebook/dpr-question_encoder-single-nq-base` ([Info](https://huggingface.co/facebook/dpr-question_encoder-single-nq-base))|`semitechnologies/transformers-inference:facebook-dpr-question_encoder-single-nq-base`|
Expand All @@ -54,6 +55,7 @@ The pre-built models include:
|`BAAI/bge-base-en-v1.5` ([Info](https://huggingface.co/BAAI/bge-base-en-v1.5))|`semitechnologies/transformers-inference:baai-bge-base-en-v1.5-onnx`|
|`BAAI/bge-m3` ([Info](https://huggingface.co/BAAI/bge-m3))|`semitechnologies/transformers-inference:baai-bge-m3-onnx`|
|`sentence-transformers/all-MiniLM-L6-v2` ([Info](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2))|`semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2-onnx`|
|`mixedbread-ai/mxbai-embed-large-v1` ([Info](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1))|`semitechnologies/transformers-inference:mixedbread-ai-mxbai-embed-large-v1-onnx`|


The above image names always point to the latest version of the inference
Expand Down
9 changes: 5 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_model_directory() -> (str, bool):
if os.path.exists(f"{model_dir}/model_name"):
with open(f"{model_dir}/model_name", "r") as f:
model_name = f.read()
return f"{model_dir}/{model_name}", True
return model_name, True
# Default model directory is ./models/model
return model_dir, False

Expand All @@ -67,14 +67,15 @@ def log_info_about_onnx(onnx_runtime: bool):
onnx_quantization_info = f.read()
logger.info(f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}")

model_dir, use_sentence_transformer_vectorizer = get_model_directory()
model_name, use_sentence_transformer_vectorizer = get_model_directory()
onnx_runtime = get_onnx_runtime()
log_info_about_onnx(onnx_runtime)

meta_config = Meta(model_dir)
meta_config = Meta(model_dir, model_name, use_sentence_transformer_vectorizer)
vec = Vectorizer(model_dir, cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.get_model_type(), meta_config.get_architecture(),
direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer)
direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer,
model_name)


@app.get("/.well-known/live", response_class=Response)
Expand Down
2 changes: 1 addition & 1 deletion download.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def download_model(model_name: str, model_dir: str):
if (model_type is not None and model_type == "t5") or use_sentence_transformers_vectorizer.lower() == "true":
SentenceTransformer(model_name, cache_folder=model_dir)
with open(f"{model_dir}/model_name", "w") as f:
f.write(model_name.replace("/", "_"))
f.write(model_name)
else:
if config.architectures and not force_automodel:
print(f"Using class {config.architectures[0]} to load model weights")
Expand Down
13 changes: 8 additions & 5 deletions meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@
class Meta:
config: AutoConfig

def __init__(self, model_path):
self.config = AutoConfig.from_pretrained(model_path)
def __init__(self, model_path: str, model_name: str, use_sentence_transformer_vectorizer: bool):
if use_sentence_transformer_vectorizer:
self.config = {"model_name": model_name, "model_type": None}
else:
self.config = AutoConfig.from_pretrained(model_path).to_dict()

def get(self):
return {
'model': self.config.to_dict()
'model': self.config
}

def get_model_type(self):
return self.config.to_dict()['model_type']
return self.config['model_type']

def get_architecture(self):
architecture = None
conf = self.config.to_dict()
conf = self.config
if "architectures" in conf:
architecture = conf["architectures"][0]
return architecture
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ uvicorn==0.27.1
nltk==3.8.1
torch==2.0.1
sentencepiece==0.2.0
sentence-transformers==2.2.2
sentence-transformers==2.6.1
optimum==1.17.1
onnxruntime==1.17.1
onnx==1.15.0
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
transformers==4.38.2
transformers==4.39.3
fastapi==0.110.0
uvicorn==0.27.1
nltk==3.8.1
torch==2.0.1
sentencepiece==0.2.0
sentence-transformers==2.2.2
sentence-transformers==2.6.1
optimum==1.17.1
onnxruntime==1.17.1
onnx==1.15.0
9 changes: 5 additions & 4 deletions vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ class Vectorizer:
executor: ThreadPoolExecutor

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float,
model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool, use_sentence_transformer_vectorizer: bool):
model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool,
use_sentence_transformer_vectorizer: bool, model_name: str):
self.executor = ThreadPoolExecutor()
if onnx_runtime:
self.vectorizer = ONNXVectorizer(model_path)
else:
if model_type == 't5' or use_sentence_transformer_vectorizer:
self.vectorizer = SentenceTransformerVectorizer(model_path, cuda_core)
self.vectorizer = SentenceTransformerVectorizer(model_path, model_name, cuda_core)
else:
self.vectorizer = HuggingFaceVectorizer(model_path, cuda_support, cuda_core, cuda_per_process_memory_fraction, model_type, architecture, direct_tokenize)

Expand All @@ -56,9 +57,9 @@ class SentenceTransformerVectorizer:
model: SentenceTransformer
cuda_core: str

def __init__(self, model_path: str, cuda_core: str):
def __init__(self, model_path: str, model_name: str, cuda_core: str):
self.cuda_core = cuda_core
self.model = SentenceTransformer(model_path, device=self.get_device())
self.model = SentenceTransformer(model_name, cache_folder=model_path, device=self.get_device())
self.model.eval() # make sure we're in inference mode, not training

def get_device(self) -> Optional[str]:
Expand Down

0 comments on commit 7381dd5

Please sign in to comment.