weaviate · StefanBogdan · Apr 21, 2023 · Apr 21, 2023 · Apr 21, 2023
diff --git a/app.py b/app.py
@@ -50,7 +50,7 @@ def startup_event():
 
 @app.get("/.well-known/live", response_class=Response)
 @app.get("/.well-known/ready", response_class=Response)
-def live_and_ready(response: Response):
+async def live_and_ready(response: Response):
     response.status_code = status.HTTP_204_NO_CONTENT
 
 

diff --git a/vectorizer.py b/vectorizer.py
@@ -1,3 +1,5 @@
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
 import math
 from typing import Optional
 import torch
@@ -33,6 +35,7 @@ class Vectorizer:
     cuda_core: str
     model_type: str
     direct_tokenize: bool
+    executor: ThreadPoolExecutor
 
     def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str, direct_tokenize: bool):
         self.cuda = cuda_support
@@ -52,8 +55,10 @@ def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per
 
         self.tokenizer = self.model_delegate.create_tokenizer(model_path)
 
+        self.executor = ThreadPoolExecutor()
+
     def tokenize(self, text:str):
-        return self.tokenizer(text, padding=True, truncation=True, max_length=500, 
+        return self.tokenizer(text, padding=True, truncation=True, max_length=500,
                 add_special_tokens = True, return_tensors="pt")
 
     def get_embeddings(self, batch_results):
@@ -65,7 +70,7 @@ def get_batch_results(self, tokens, text):
     def pool_embedding(self, batch_results, tokens, config):
         return self.model_delegate.pool_embedding(batch_results, tokens, config)
 
-    async def vectorize(self, text: str, config: VectorInputConfig):
+    def _vectorize(self, text: str, config: VectorInputConfig):
         with torch.no_grad():
             if self.direct_tokenize:
                 # create embeddings without tokenizing text
@@ -92,6 +97,9 @@ async def vectorize(self, text: str, config: VectorInputConfig):
                     batch_sum_vectors += self.pool_embedding(batch_results, tokens, config)
                 return batch_sum_vectors.detach() / num_sentences
 
+    async def vectorize(self, text: str, config: VectorInputConfig):
+        return await asyncio.wrap_future(self.executor.submit(self._vectorize, text, config))
+
 
 class HFModel: