Skip to content

Commit

Permalink
Merge pull request #71 from weaviate/add-support-for-onnx-runtime
Browse files Browse the repository at this point in the history
Add support for ONNX runtime
  • Loading branch information
antas-marcin authored Dec 1, 2023
2 parents b3c001b + f5bbf90 commit 1ebc017
Show file tree
Hide file tree
Showing 15 changed files with 226 additions and 91 deletions.
8 changes: 8 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
__pycache__
.github
.venv
.vscode
cicd
models
nltk_data
smoke_test.py
test_app.py
requirements-test.txt
40 changes: 38 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,54 +20,90 @@ jobs:
include:
- model_name: distilbert-base-uncased
model_tag_name: distilbert-base-uncased
onnx_runtime: false
- model_name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
model_tag_name: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
onnx_runtime: false
- model_name: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
model_tag_name: sentence-transformers-multi-qa-MiniLM-L6-cos-v1
onnx_runtime: false
- model_name: sentence-transformers/multi-qa-mpnet-base-cos-v1
model_tag_name: sentence-transformers-multi-qa-mpnet-base-cos-v1
onnx_runtime: false
- model_name: sentence-transformers/all-mpnet-base-v2
model_tag_name: sentence-transformers-all-mpnet-base-v2
onnx_runtime: false
- model_name: sentence-transformers/all-MiniLM-L12-v2
model_tag_name: sentence-transformers-all-MiniLM-L12-v2
onnx_runtime: false
- model_name: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
model_tag_name: sentence-transformers-paraphrase-multilingual-mpnet-base-v2
onnx_runtime: false
- model_name: sentence-transformers/all-MiniLM-L6-v2
model_tag_name: sentence-transformers-all-MiniLM-L6-v2
onnx_runtime: false
- model_name: sentence-transformers/multi-qa-distilbert-cos-v1
model_tag_name: sentence-transformers-multi-qa-distilbert-cos-v1
onnx_runtime: false
- model_name: sentence-transformers/gtr-t5-base
model_tag_name: sentence-transformers-gtr-t5-base
onnx_runtime: false
- model_name: sentence-transformers/gtr-t5-large
model_tag_name: sentence-transformers-gtr-t5-large
onnx_runtime: false
- model_name: sentence-transformers/sentence-t5-base
model_tag_name: sentence-transformers-sentence-t5-base
onnx_runtime: false
- model_name: vblagoje/dpr-ctx_encoder-single-lfqa-wiki
model_tag_name: vblagoje-dpr-ctx_encoder-single-lfqa-wiki
onnx_runtime: false
- model_name: vblagoje/dpr-question_encoder-single-lfqa-wiki
model_tag_name: vblagoje-dpr-question_encoder-single-lfqa-wiki
onnx_runtime: false
- model_name: facebook/dpr-ctx_encoder-single-nq-base
model_tag_name: facebook-dpr-ctx_encoder-single-nq-base
onnx_runtime: false
- model_name: facebook/dpr-question_encoder-single-nq-base
model_tag_name: facebook-dpr-question_encoder-single-nq-base
onnx_runtime: false
- model_name: google/flan-t5-base
model_tag_name: google-flan-t5-base
onnx_runtime: false
- model_name: google/flan-t5-large
model_tag_name: google-flan-t5-large
onnx_runtime: false
- model_name: biu-nlp/abstract-sim-sentence
model_tag_name: biu-nlp-abstract-sim-sentence
onnx_runtime: false
- model_name: biu-nlp/abstract-sim-query
model_tag_name: biu-nlp-abstract-sim-query
onnx_runtime: false
- model_name: BAAI/bge-small-en
model_tag_name: baai-bge-small-en
onnx_runtime: true
- model_name: BAAI/bge-small-en-v1.5
model_tag_name: baai-bge-small-en-v1.5
onnx_runtime: true
- model_name: BAAI/bge-base-en
model_tag_name: baai-bge-base-en
onnx_runtime: true
- model_name: BAAI/bge-base-en-v1.5
model_tag_name: baai-bge-base-en-v1.5
onnx_runtime: true
- model_name: sentence-transformers/all-MiniLM-L6-v2
model_tag_name: sentence-transformers-all-MiniLM-L6-v2
onnx_runtime: true
env:
LOCAL_REPO: transformers-inference
REMOTE_REPO: semitechnologies/transformers-inference
MODEL_NAME: ${{matrix.model_name}}
MODEL_TAG_NAME: ${{matrix.model_tag_name}}
ONNX_RUNTIME: ${{matrix.onnx_runtime}}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"
cache: 'pip' # caching pip dependencies
- name: Login to Docker Hub
if: ${{ !github.event.pull_request.head.repo.fork }} # no PRs from fork
Expand Down Expand Up @@ -96,7 +132,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"
- name: Login to Docker Hub
if: ${{ !github.event.pull_request.head.repo.fork }} # no PRs from fork
uses: docker/login-action@v2
Expand Down
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

WORKDIR /app

Expand All @@ -8,7 +8,10 @@ RUN pip install --upgrade pip setuptools
COPY requirements.txt .
RUN pip3 install -r requirements.txt

ARG TARGETARCH
ARG MODEL_NAME
ARG ONNX_RUNTIME
ENV ONNX_CPU=${TARGETARCH}
COPY download.py .
RUN ./download.py

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ The pre-built models include:
|Bar-Ilan University NLP Lab Models|
|`biu-nlp/abstract-sim-sentence` ([Info](https://huggingface.co/biu-nlp/abstract-sim-sentence))|`semitechnologies/transformers-inference:biu-nlp-abstract-sim-sentence`|
|`biu-nlp/abstract-sim-query` ([Info](https://huggingface.co/biu-nlp/abstract-sim-query))|`semitechnologies/transformers-inference:biu-nlp-abstract-sim-query`|
|ONNX Models|
|`BAAI/bge-small-en` ([Info](https://huggingface.co/BAAI/bge-small-en))|`semitechnologies/transformers-inference:baai-bge-small-en-onnx`|
|`BAAI/bge-small-en-v1.5` ([Info](https://huggingface.co/BAAI/bge-small-en-v1.5))|`semitechnologies/transformers-inference:baai-bge-small-en-v1.5-onnx`|
|`BAAI/bge-base-en` ([Info](https://huggingface.co/BAAI/bge-base-en))|`semitechnologies/transformers-inference:baai-bge-base-en-onnx`|
|`BAAI/bge-base-en-v1.5` ([Info](https://huggingface.co/BAAI/bge-base-en-v1.5))|`semitechnologies/transformers-inference:baai-bge-base-en-v1.5-onnx`|
|`sentence-transformers/all-MiniLM-L6-v2` ([Info](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2))|`semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2-onnx`|


The above image names always point to the latest version of the inference
Expand Down
30 changes: 25 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,36 @@ def startup_event():
if transformers_direct_tokenize is not None and transformers_direct_tokenize == "true" or transformers_direct_tokenize == "1":
direct_tokenize = True

model_dir = "./models/model"
def get_model_directory() -> str:
if os.path.exists("./models/model/model_name"):
with open("./models/model/model_name", "r") as f:
if os.path.exists(f"{model_dir}/model_name"):
with open(f"{model_dir}/model_name", "r") as f:
model_name = f.read()
return f"./models/model/{model_name}"
return "./models/model"
return f"{model_dir}/{model_name}"
return model_dir

def get_onnx_runtime() -> bool:
if os.path.exists(f"{model_dir}/onnx_runtime"):
with open(f"{model_dir}/onnx_runtime", "r") as f:
onnx_runtime = f.read()
return onnx_runtime == "true"
return False

def log_info_about_onnx(onnx_runtime: bool):
if onnx_runtime:
onnx_quantization_info = "missing"
if os.path.exists(f"{model_dir}/onnx_quantization_info"):
with open(f"{model_dir}/onnx_quantization_info", "r") as f:
onnx_quantization_info = f.read()
logger.info(f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}")

onnx_runtime = get_onnx_runtime()
log_info_about_onnx(onnx_runtime)

meta_config = Meta(get_model_directory())
vec = Vectorizer(get_model_directory(), cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.getModelType(), meta_config.get_architecture(), direct_tokenize)
meta_config.get_model_type(), meta_config.get_architecture(),
direct_tokenize, onnx_runtime)


@app.get("/.well-known/live", response_class=Response)
Expand Down
6 changes: 5 additions & 1 deletion cicd/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,9 @@ set -eou pipefail

local_repo=${LOCAL_REPO?Variable LOCAL_REPO is required}
model_name=${MODEL_NAME?Variable MODEL_NAME is required}
onnx_runtime=${ONNX_RUNTIME?Variable ONNX_RUNTIME is required}

docker build --build-arg "MODEL_NAME=$model_name" -t "$local_repo" .
docker build \
--build-arg "MODEL_NAME=$model_name" \
--build-arg "ONNX_RUNTIME=$onnx_runtime" \
-t "$local_repo" .
21 changes: 10 additions & 11 deletions cicd/docker_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,11 @@

set -eou pipefail

# Docker push rules
# If on tag (e.g. 1.0.0)
# - any commit is pushed as :<model>-<semver>
# - any commit is pushed as :<model>-latest
# - any commit is pushed as :<model>
git_hash=
remote_repo=${REMOTE_REPO?Variable REMOTE_REPO is required}
model_name=${MODEL_NAME?Variable MODEL_NAME is required}
docker_username=${DOCKER_USERNAME?Variable DOCKER_USERNAME is required}
docker_password=${DOCKER_PASSWORD?Variable DOCKER_PASSWORD is required}
onnx_runtime=${ONNX_RUNTIME?Variable ONNX_RUNTIME is required}
original_model_name=$model_name
git_tag=$GITHUB_REF_NAME

Expand All @@ -20,6 +15,7 @@ function main() {
echo "git ref type is $GITHUB_REF_TYPE"
echo "git ref name is $GITHUB_REF_NAME"
echo "git tag is $git_tag"
echo "onnx_runtime is $onnx_runtime"
push_tag
}

Expand All @@ -31,22 +27,25 @@ function init() {
model_name="$MODEL_TAG_NAME"
fi

git_hash="$(git rev-parse HEAD | head -c 7)"

docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker buildx create --use
echo "$docker_password" | docker login -u "$docker_username" --password-stdin
}

function push_tag() {
if [ ! -z "$git_tag" ] && [ "$GITHUB_REF_TYPE" == "tag" ]; then
tag_git="$remote_repo:$model_name-$git_tag"
tag_latest="$remote_repo:$model_name-latest"
tag="$remote_repo:$model_name"
model_name_part=$model_name
if [ "$onnx_runtime" == "true" ]; then
model_name_part="$model_name-onnx"
fi
tag_git="$remote_repo:$model_name_part-$git_tag"
tag_latest="$remote_repo:$model_name_part-latest"
tag="$remote_repo:$model_name_part"

echo "Tag & Push $tag, $tag_latest, $tag_git"
docker buildx build --platform=linux/arm64,linux/amd64 \
--build-arg "MODEL_NAME=$original_model_name" \
--build-arg "ONNX_RUNTIME=$onnx_runtime" \
--push \
--tag "$tag_git" \
--tag "$tag_latest" \
Expand Down
18 changes: 0 additions & 18 deletions cicd/markdown_table_from_api.py

This file was deleted.

24 changes: 0 additions & 24 deletions cicd/travis_yml_to_markdown_table.py

This file was deleted.

2 changes: 1 addition & 1 deletion custom.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

WORKDIR /app

Expand Down
Loading

0 comments on commit 1ebc017

Please sign in to comment.