From e51d71f066e607ced59ce1705e8cf5f0460051c1 Mon Sep 17 00:00:00 2001 From: Marcin Antas Date: Wed, 3 Apr 2024 13:06:39 +0200 Subject: [PATCH] Adjust SentenceTransformer vectorizer implementation --- .github/workflows/main.yaml | 5 ++--- app.py | 9 +++++---- meta.py | 13 ++++++++----- requirements.txt | 2 +- vectorizer.py | 9 +++++---- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f6b45ae..967137c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -99,15 +99,14 @@ jobs: - model_name: mixedbread-ai/mxbai-embed-large-v1 model_tag_name: mixedbread-ai-mxbai-embed-large-v1 onnx_runtime: false - - model_name: mixedbread-ai/mxbai-embed-large-v1 - model_tag_name: mixedbread-ai-mxbai-embed-large-v1 - onnx_runtime: true + use_sentence_transformers_vectorizer: true env: LOCAL_REPO: transformers-inference REMOTE_REPO: semitechnologies/transformers-inference MODEL_NAME: ${{matrix.model_name}} MODEL_TAG_NAME: ${{matrix.model_tag_name}} ONNX_RUNTIME: ${{matrix.onnx_runtime}} + USE_SENTENCE_TRANSFORMERS_VECTORIZER: ${{matrix.use_sentence_transformers_vectorizer}} steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/app.py b/app.py index bf4968f..752e0c9 100644 --- a/app.py +++ b/app.py @@ -48,7 +48,7 @@ def get_model_directory() -> (str, bool): if os.path.exists(f"{model_dir}/model_name"): with open(f"{model_dir}/model_name", "r") as f: model_name = f.read() - return f"{model_dir}/{model_name}", True + return model_name, True # Default model directory is ./models/model return model_dir, False @@ -67,14 +67,15 @@ def log_info_about_onnx(onnx_runtime: bool): onnx_quantization_info = f.read() logger.info(f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}") - model_dir, use_sentence_transformer_vectorizer = get_model_directory() + model_name, use_sentence_transformer_vectorizer = get_model_directory() onnx_runtime = get_onnx_runtime() log_info_about_onnx(onnx_runtime) - meta_config = Meta(model_dir) + meta_config = Meta(model_dir, model_name, use_sentence_transformer_vectorizer) vec = Vectorizer(model_dir, cuda_support, cuda_core, cuda_per_process_memory_fraction, meta_config.get_model_type(), meta_config.get_architecture(), - direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer) + direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer, + model_name) @app.get("/.well-known/live", response_class=Response) diff --git a/meta.py b/meta.py index 1574b03..eb12f5f 100644 --- a/meta.py +++ b/meta.py @@ -4,20 +4,23 @@ class Meta: config: AutoConfig - def __init__(self, model_path): - self.config = AutoConfig.from_pretrained(model_path) + def __init__(self, model_path: str, model_name: str, use_sentence_transformer_vectorizer: bool): + if use_sentence_transformer_vectorizer: + self.config = {"model_name": model_name, "model_type": None} + else: + self.config = AutoConfig.from_pretrained(model_path).to_dict() def get(self): return { - 'model': self.config.to_dict() + 'model': self.config } def get_model_type(self): - return self.config.to_dict()['model_type'] + return self.config['model_type'] def get_architecture(self): architecture = None - conf = self.config.to_dict() + conf = self.config if "architectures" in conf: architecture = conf["architectures"][0] return architecture diff --git a/requirements.txt b/requirements.txt index 25db8a0..e168d3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.38.2 +transformers==4.39.3 fastapi==0.110.0 uvicorn==0.27.1 nltk==3.8.1 diff --git a/vectorizer.py b/vectorizer.py index 6e403db..b4ee5a6 100644 --- a/vectorizer.py +++ b/vectorizer.py @@ -38,13 +38,14 @@ class Vectorizer: executor: ThreadPoolExecutor def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, - model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool, use_sentence_transformer_vectorizer: bool): + model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool, + use_sentence_transformer_vectorizer: bool, model_name: str): self.executor = ThreadPoolExecutor() if onnx_runtime: self.vectorizer = ONNXVectorizer(model_path) else: if model_type == 't5' or use_sentence_transformer_vectorizer: - self.vectorizer = SentenceTransformerVectorizer(model_path, cuda_core) + self.vectorizer = SentenceTransformerVectorizer(model_path, model_name, cuda_core) else: self.vectorizer = HuggingFaceVectorizer(model_path, cuda_support, cuda_core, cuda_per_process_memory_fraction, model_type, architecture, direct_tokenize) @@ -56,9 +57,9 @@ class SentenceTransformerVectorizer: model: SentenceTransformer cuda_core: str - def __init__(self, model_path: str, cuda_core: str): + def __init__(self, model_path: str, model_name: str, cuda_core: str): self.cuda_core = cuda_core - self.model = SentenceTransformer(model_path, device=self.get_device()) + self.model = SentenceTransformer(model_name, cache_folder=model_path, device=self.get_device()) self.model.eval() # make sure we're in inference mode, not training def get_device(self) -> Optional[str]: