Skip to content

Commit

Permalink
Adjust SentenceTransformer vectorizer implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
antas-marcin committed Apr 3, 2024
1 parent 38d7c04 commit e51d71f
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 17 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,14 @@ jobs:
- model_name: mixedbread-ai/mxbai-embed-large-v1
model_tag_name: mixedbread-ai-mxbai-embed-large-v1
onnx_runtime: false
- model_name: mixedbread-ai/mxbai-embed-large-v1
model_tag_name: mixedbread-ai-mxbai-embed-large-v1
onnx_runtime: true
use_sentence_transformers_vectorizer: true
env:
LOCAL_REPO: transformers-inference
REMOTE_REPO: semitechnologies/transformers-inference
MODEL_NAME: ${{matrix.model_name}}
MODEL_TAG_NAME: ${{matrix.model_tag_name}}
ONNX_RUNTIME: ${{matrix.onnx_runtime}}
USE_SENTENCE_TRANSFORMERS_VECTORIZER: ${{matrix.use_sentence_transformers_vectorizer}}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand Down
9 changes: 5 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_model_directory() -> (str, bool):
if os.path.exists(f"{model_dir}/model_name"):
with open(f"{model_dir}/model_name", "r") as f:
model_name = f.read()
return f"{model_dir}/{model_name}", True
return model_name, True
# Default model directory is ./models/model
return model_dir, False

Expand All @@ -67,14 +67,15 @@ def log_info_about_onnx(onnx_runtime: bool):
onnx_quantization_info = f.read()
logger.info(f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}")

model_dir, use_sentence_transformer_vectorizer = get_model_directory()
model_name, use_sentence_transformer_vectorizer = get_model_directory()
onnx_runtime = get_onnx_runtime()
log_info_about_onnx(onnx_runtime)

meta_config = Meta(model_dir)
meta_config = Meta(model_dir, model_name, use_sentence_transformer_vectorizer)
vec = Vectorizer(model_dir, cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.get_model_type(), meta_config.get_architecture(),
direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer)
direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer,
model_name)


@app.get("/.well-known/live", response_class=Response)
Expand Down
13 changes: 8 additions & 5 deletions meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@
class Meta:
config: AutoConfig

def __init__(self, model_path):
self.config = AutoConfig.from_pretrained(model_path)
def __init__(self, model_path: str, model_name: str, use_sentence_transformer_vectorizer: bool):
if use_sentence_transformer_vectorizer:
self.config = {"model_name": model_name, "model_type": None}
else:
self.config = AutoConfig.from_pretrained(model_path).to_dict()

def get(self):
return {
'model': self.config.to_dict()
'model': self.config
}

def get_model_type(self):
return self.config.to_dict()['model_type']
return self.config['model_type']

def get_architecture(self):
architecture = None
conf = self.config.to_dict()
conf = self.config
if "architectures" in conf:
architecture = conf["architectures"][0]
return architecture
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
transformers==4.38.2
transformers==4.39.3
fastapi==0.110.0
uvicorn==0.27.1
nltk==3.8.1
Expand Down
9 changes: 5 additions & 4 deletions vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ class Vectorizer:
executor: ThreadPoolExecutor

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float,
model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool, use_sentence_transformer_vectorizer: bool):
model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool,
use_sentence_transformer_vectorizer: bool, model_name: str):
self.executor = ThreadPoolExecutor()
if onnx_runtime:
self.vectorizer = ONNXVectorizer(model_path)
else:
if model_type == 't5' or use_sentence_transformer_vectorizer:
self.vectorizer = SentenceTransformerVectorizer(model_path, cuda_core)
self.vectorizer = SentenceTransformerVectorizer(model_path, model_name, cuda_core)
else:
self.vectorizer = HuggingFaceVectorizer(model_path, cuda_support, cuda_core, cuda_per_process_memory_fraction, model_type, architecture, direct_tokenize)

Expand All @@ -56,9 +57,9 @@ class SentenceTransformerVectorizer:
model: SentenceTransformer
cuda_core: str

def __init__(self, model_path: str, cuda_core: str):
def __init__(self, model_path: str, model_name: str, cuda_core: str):
self.cuda_core = cuda_core
self.model = SentenceTransformer(model_path, device=self.get_device())
self.model = SentenceTransformer(model_name, cache_folder=model_path, device=self.get_device())
self.model.eval() # make sure we're in inference mode, not training

def get_device(self) -> Optional[str]:
Expand Down

0 comments on commit e51d71f

Please sign in to comment.