Skip to content

Commit

Permalink
[Feature Branch][LLM Testing] Full Testing Harness for LLMs (#1216)
Browse files Browse the repository at this point in the history
* initial commit

* initial commit

* [Feature Branch][LLM Testing] Create GroundTruthSource objects (#1219)

* initial commit

* finish creation of helper objects

* Update tests/conftest.py

* small refactor

* [Feature Branch][LLM Testing] LLM Testing Suite (#1227)

* Update README.md

* Update src/deepsparse/yolov8/README.md

* Update text_generation.py

* quality

* readability

* all tests passing

* added some full kv cache tests

* initial commit

* ready for review

* Delete tests/deepsparse/transformers/pipelines/proposal_text_generation_tests.md

* fix tests

* Dipika's comments plus adjusting the script to renamed variables

* remove ORT ground truth

* add OPT tests

* rebase and disable tests in GHA

* quality
  • Loading branch information
dbogunowicz authored Sep 13, 2023
1 parent 1439359 commit 907ea83
Show file tree
Hide file tree
Showing 3 changed files with 519 additions and 229 deletions.
26 changes: 21 additions & 5 deletions src/deepsparse/transformers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
_MODEL_DIR_CONFIG_NAME = "config.json"
_MODEL_DIR_TOKENIZER_NAME = "tokenizer.json"
_MODEL_DIR_TOKENIZER_CONFIG_NAME = "tokenizer_config.json"
_OPT_TOKENIZER_FILES = ["special_tokens_map.json", "vocab.json", "merges.txt"]


def get_onnx_path(model_path: str) -> str:
Expand Down Expand Up @@ -122,14 +123,29 @@ def get_hugging_face_configs(model_path: str) -> Tuple[str, str]:
config_path = _get_file_parent(
zoo_model.deployment.default.get_file(_MODEL_DIR_CONFIG_NAME).path
)
tokenizer_path = _get_file_parent(
zoo_model.deployment.default.get_file(_MODEL_DIR_TOKENIZER_NAME).path
tokenizer_file = zoo_model.deployment.default.get_file(
_MODEL_DIR_TOKENIZER_NAME
)
tokenizer_config_path = zoo_model.deployment.default.get_file(

tokenizer_config_file = zoo_model.deployment.default.get_file(
_MODEL_DIR_TOKENIZER_CONFIG_NAME
)
if tokenizer_config_path is not None:
tokenizer_config_path.path # trigger download of tokenizer_config

if tokenizer_config_file is not None:
tokenizer_config_path = _get_file_parent(
tokenizer_config_file.path
) # trigger download of tokenizer_config

if tokenizer_file is not None:
tokenizer_path = _get_file_parent(tokenizer_file.path)
else:
# if tokenizer_file is not present, we assume it's the OPT model
# this means that we use tokenizer_config_path instead of tokenizer_path
# and need to download the additional tokenizer files
tokenizer_path = tokenizer_config_path
for file in _OPT_TOKENIZER_FILES:
zoo_model.deployment.default.get_file(file).path

else:
raise ValueError(
f"model_path {model_path} is not a valid directory or zoo stub"
Expand Down
84 changes: 84 additions & 0 deletions tests/deepsparse/transformers/pipelines/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Tuple

import numpy
from transformers import AutoModelForCausalLM, AutoTokenizer


class TorchGroundTruthSource:
"""
An object that generates ground truth logits and
cache states from a prompt. This object can
generate tokens in an autoregressive manner, and thus
will output:
- prompt logits,
- generated logits,
- prompt cache state,
- generated sequence
"""

def __init__(self, num_tokens_to_generate: int, model_name: str):

self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.tokenizer = self._create_tokenizer(model_name)

self.num_tokens_to_generate = num_tokens_to_generate
self.model_name = model_name

def tokenize(self, prompt: str):
return self.tokenizer(prompt, return_tensors="pt")

def __call__(
self, prompt: str
) -> Tuple[numpy.ndarray, numpy.ndarray, List[numpy.ndarray], str]:
# afaik it is not possible to get 'past_key_values' from
# the generate method, so we have to run the model twice
out = self.model.generate(
self.tokenize(prompt).input_ids,
max_new_tokens=self.num_tokens_to_generate,
output_scores=True,
return_dict_in_generate=True,
use_cache=True,
)
generated_text = self.tokenizer.decode(
out.sequences[0], skip_special_tokens=True
)
generated_logits = numpy.concatenate(
[[score.numpy() for score in out.scores]]
).transpose(
1, 0, 2
) # (1, num_tokens_to_generate, vocab_size)

out = self.model(**self.tokenize(prompt))
prompt_logits = out.logits.detach().numpy()[
:, :-1, :
] # (1, prompt_length, vocab_size)
prompt_cache = [
entry.detach().numpy()
for key_value_tuple in out.past_key_values
for entry in key_value_tuple
] # List[(1, num_heads, past_length, head_dim)]

return generated_logits, prompt_logits, prompt_cache, generated_text

@staticmethod
def _create_tokenizer(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "left"
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token

return tokenizer
Loading

0 comments on commit 907ea83

Please sign in to comment.