From de0074df94517271a1cb5931d3407c2ff2dd4a25 Mon Sep 17 00:00:00 2001 From: Charles Pierse Date: Thu, 20 Jun 2024 20:43:25 +0100 Subject: [PATCH 1/2] Pass trust remote code to from_pretrained instances --- config.py | 3 +++ meta.py | 19 +++++++++++++------ vectorizer.py | 38 ++++++++++++++++++-------------------- 3 files changed, 34 insertions(+), 26 deletions(-) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 0000000..f7d8dd6 --- /dev/null +++ b/config.py @@ -0,0 +1,3 @@ +import os + +TRUST_REMOTE_CODE = os.getenv("TRUST_REMOTE_CODE", False) diff --git a/meta.py b/meta.py index eb12f5f..1998866 100644 --- a/meta.py +++ b/meta.py @@ -1,22 +1,29 @@ from transformers import AutoConfig +from config import TRUST_REMOTE_CODE + class Meta: config: AutoConfig - def __init__(self, model_path: str, model_name: str, use_sentence_transformer_vectorizer: bool): + def __init__( + self, + model_path: str, + model_name: str, + use_sentence_transformer_vectorizer: bool, + ): if use_sentence_transformer_vectorizer: self.config = {"model_name": model_name, "model_type": None} else: - self.config = AutoConfig.from_pretrained(model_path).to_dict() + self.config = AutoConfig.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ).to_dict() def get(self): - return { - 'model': self.config - } + return {"model": self.config} def get_model_type(self): - return self.config['model_type'] + return self.config["model_type"] def get_architecture(self): architecture = None diff --git a/vectorizer.py b/vectorizer.py index b4ee5a6..75e0a74 100644 --- a/vectorizer.py +++ b/vectorizer.py @@ -1,24 +1,21 @@ import asyncio -from concurrent.futures import ThreadPoolExecutor import math +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path from typing import Optional + +import nltk import torch import torch.nn.functional as F -from pathlib import Path -import nltk from nltk.tokenize import sent_tokenize +from optimum.onnxruntime import ORTModelForFeatureExtraction from pydantic import BaseModel -from transformers import ( - AutoModel, - AutoTokenizer, - T5ForConditionalGeneration, - T5Tokenizer, - DPRContextEncoder, - DPRQuestionEncoder, -) from sentence_transformers import SentenceTransformer -from optimum.onnxruntime import ORTModelForFeatureExtraction +from transformers import (AutoModel, AutoTokenizer, DPRContextEncoder, + DPRQuestionEncoder, T5ForConditionalGeneration, + T5Tokenizer) +from config import TRUST_REMOTE_CODE # limit transformer batch size to limit parallel inference, otherwise we run # into memory problems @@ -78,8 +75,9 @@ class ONNXVectorizer: def __init__(self, model_path) -> None: onnx_path = Path(model_path) - self.model = ORTModelForFeatureExtraction.from_pretrained(onnx_path, file_name="model_quantized.onnx") - self.tokenizer = AutoTokenizer.from_pretrained(onnx_path) + self.model = ORTModelForFeatureExtraction.from_pretrained(onnx_path, file_name="model_quantized.onnx", + trust_remote_code=TRUST_REMOTE_CODE) + self.tokenizer = AutoTokenizer.from_pretrained(onnx_path, trust_remote_code=TRUST_REMOTE_CODE) def mean_pooling(self, model_output, attention_mask): token_embeddings = model_output[0] #First element of model_output contains all token embeddings @@ -179,11 +177,11 @@ def __init__(self, cuda_support: bool, cuda_core: str): self.cuda_core = cuda_core def create_tokenizer(self, model_path): - self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) return self.tokenizer def create_model(self, model_path): - self.model = AutoModel.from_pretrained(model_path) + self.model = AutoModel.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) return self.model def get_embeddings(self, batch_results): @@ -236,9 +234,9 @@ def __init__(self, architecture: str, cuda_support: bool, cuda_core: str): def create_model(self, model_path): if self.architecture == "DPRQuestionEncoder": - self.model = DPRQuestionEncoder.from_pretrained(model_path) + self.model = DPRQuestionEncoder.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) else: - self.model = DPRContextEncoder.from_pretrained(model_path) + self.model = DPRContextEncoder.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) return self.model def get_batch_results(self, tokens, text): @@ -259,11 +257,11 @@ def __init__(self, cuda_support: bool, cuda_core: str): self.cuda_core = cuda_core def create_model(self, model_path): - self.model = T5ForConditionalGeneration.from_pretrained(model_path) + self.model = T5ForConditionalGeneration.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) return self.model def create_tokenizer(self, model_path): - self.tokenizer = T5Tokenizer.from_pretrained(model_path) + self.tokenizer = T5Tokenizer.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) return self.tokenizer def get_embeddings(self, batch_results): From 7e237ca847ce1373693a75d7b5eaeb1b2042c262 Mon Sep 17 00:00:00 2001 From: Charles Pierse Date: Fri, 21 Jun 2024 11:30:26 +0100 Subject: [PATCH 2/2] Pin numpy to be <2.0 --- requirements-test.txt | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements-test.txt b/requirements-test.txt index fa104cd..08e9d33 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -9,4 +9,5 @@ sentence-transformers==2.6.1 optimum==1.17.1 onnxruntime==1.17.1 onnx==1.15.0 +numpy==1.26.4 pytest diff --git a/requirements.txt b/requirements.txt index e168d3d..037bf4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ sentence-transformers==2.6.1 optimum==1.17.1 onnxruntime==1.17.1 onnx==1.15.0 +numpy==1.26.4