Skip to content

Commit

Permalink
Vectorize asynchronously with ThreadPoolExecutor (#56)
Browse files Browse the repository at this point in the history
* vectorize asynchronously with ThreadPoolExecutor

* unbounded executor workers
  • Loading branch information
alexcannan authored Apr 21, 2023
1 parent 75fb7d7 commit 64b36bd
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def startup_event():

@app.get("/.well-known/live", response_class=Response)
@app.get("/.well-known/ready", response_class=Response)
def live_and_ready(response: Response):
async def live_and_ready(response: Response):
response.status_code = status.HTTP_204_NO_CONTENT


Expand Down
12 changes: 10 additions & 2 deletions vectorizer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import asyncio
from concurrent.futures import ThreadPoolExecutor
import math
from typing import Optional
import torch
Expand Down Expand Up @@ -33,6 +35,7 @@ class Vectorizer:
cuda_core: str
model_type: str
direct_tokenize: bool
executor: ThreadPoolExecutor

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str, direct_tokenize: bool):
self.cuda = cuda_support
Expand All @@ -52,8 +55,10 @@ def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per

self.tokenizer = self.model_delegate.create_tokenizer(model_path)

self.executor = ThreadPoolExecutor()

def tokenize(self, text:str):
return self.tokenizer(text, padding=True, truncation=True, max_length=500,
return self.tokenizer(text, padding=True, truncation=True, max_length=500,
add_special_tokens = True, return_tensors="pt")

def get_embeddings(self, batch_results):
Expand All @@ -65,7 +70,7 @@ def get_batch_results(self, tokens, text):
def pool_embedding(self, batch_results, tokens, config):
return self.model_delegate.pool_embedding(batch_results, tokens, config)

async def vectorize(self, text: str, config: VectorInputConfig):
def _vectorize(self, text: str, config: VectorInputConfig):
with torch.no_grad():
if self.direct_tokenize:
# create embeddings without tokenizing text
Expand All @@ -92,6 +97,9 @@ async def vectorize(self, text: str, config: VectorInputConfig):
batch_sum_vectors += self.pool_embedding(batch_results, tokens, config)
return batch_sum_vectors.detach() / num_sentences

async def vectorize(self, text: str, config: VectorInputConfig):
return await asyncio.wrap_future(self.executor.submit(self._vectorize, text, config))


class HFModel:

Expand Down

0 comments on commit 64b36bd

Please sign in to comment.