[Feature Branch][LLM Testing] Full Testing Harness for LLMs (#1216)

* initial commit * initial commit * [Feature Branch][LLM Testing] Create GroundTruthSource objects (#1219) * initial commit * finish creation of helper objects * Update tests/conftest.py * small refactor * [Feature Branch][LLM Testing] LLM Testing Suite (#1227) * Update README.md * Update src/deepsparse/yolov8/README.md * Update text_generation.py * quality * readability * all tests passing * added some full kv cache tests * initial commit * ready for review * Delete tests/deepsparse/transformers/pipelines/proposal_text_generation_tests.md * fix tests * Dipika's comments plus adjusting the script to renamed variables * remove ORT ground truth * add OPT tests * rebase and disable tests in GHA * quality
neuralmagic · Sep 13, 2023 · 907ea83 · 907ea83
1 parent 1439359
commit 907ea83
Show file tree

Hide file tree

Showing 3 changed files with 519 additions and 229 deletions.
diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py
@@ -49,6 +49,7 @@
 _MODEL_DIR_CONFIG_NAME = "config.json"
 _MODEL_DIR_TOKENIZER_NAME = "tokenizer.json"
 _MODEL_DIR_TOKENIZER_CONFIG_NAME = "tokenizer_config.json"
+_OPT_TOKENIZER_FILES = ["special_tokens_map.json", "vocab.json", "merges.txt"]
 
 
 def get_onnx_path(model_path: str) -> str:
@@ -122,14 +123,29 @@ def get_hugging_face_configs(model_path: str) -> Tuple[str, str]:
         config_path = _get_file_parent(
             zoo_model.deployment.default.get_file(_MODEL_DIR_CONFIG_NAME).path
         )
-        tokenizer_path = _get_file_parent(
-            zoo_model.deployment.default.get_file(_MODEL_DIR_TOKENIZER_NAME).path
+        tokenizer_file = zoo_model.deployment.default.get_file(
+            _MODEL_DIR_TOKENIZER_NAME
         )
-        tokenizer_config_path = zoo_model.deployment.default.get_file(
+
+        tokenizer_config_file = zoo_model.deployment.default.get_file(
             _MODEL_DIR_TOKENIZER_CONFIG_NAME
         )
-        if tokenizer_config_path is not None:
-            tokenizer_config_path.path  # trigger download of tokenizer_config
+
+        if tokenizer_config_file is not None:
+            tokenizer_config_path = _get_file_parent(
+                tokenizer_config_file.path
+            )  # trigger download of tokenizer_config
+
+        if tokenizer_file is not None:
+            tokenizer_path = _get_file_parent(tokenizer_file.path)
+        else:
+            # if tokenizer_file is not present, we assume it's the OPT model
+            # this means that we use tokenizer_config_path instead of tokenizer_path
+            # and need to download the additional tokenizer files
+            tokenizer_path = tokenizer_config_path
+            for file in _OPT_TOKENIZER_FILES:
+                zoo_model.deployment.default.get_file(file).path
+
     else:
         raise ValueError(
             f"model_path {model_path} is not a valid directory or zoo stub"

diff --git a/tests/deepsparse/transformers/pipelines/helpers.py b/tests/deepsparse/transformers/pipelines/helpers.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Tuple
+
+import numpy
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+class TorchGroundTruthSource:
+    """
+    An object that generates ground truth logits and
+    cache states from a prompt. This object can
+    generate tokens in an autoregressive manner, and thus
+    will output:
+     - prompt logits,
+     - generated logits,
+     - prompt cache state,
+     - generated sequence
+    """
+
+    def __init__(self, num_tokens_to_generate: int, model_name: str):
+
+        self.model = AutoModelForCausalLM.from_pretrained(model_name)
+        self.tokenizer = self._create_tokenizer(model_name)
+
+        self.num_tokens_to_generate = num_tokens_to_generate
+        self.model_name = model_name
+
+    def tokenize(self, prompt: str):
+        return self.tokenizer(prompt, return_tensors="pt")
+
+    def __call__(
+        self, prompt: str
+    ) -> Tuple[numpy.ndarray, numpy.ndarray, List[numpy.ndarray], str]:
+        # afaik it is not possible to get 'past_key_values' from
+        # the generate method, so we have to run the model twice
+        out = self.model.generate(
+            self.tokenize(prompt).input_ids,
+            max_new_tokens=self.num_tokens_to_generate,
+            output_scores=True,
+            return_dict_in_generate=True,
+            use_cache=True,
+        )
+        generated_text = self.tokenizer.decode(
+            out.sequences[0], skip_special_tokens=True
+        )
+        generated_logits = numpy.concatenate(
+            [[score.numpy() for score in out.scores]]
+        ).transpose(
+            1, 0, 2
+        )  # (1, num_tokens_to_generate, vocab_size)
+
+        out = self.model(**self.tokenize(prompt))
+        prompt_logits = out.logits.detach().numpy()[
+            :, :-1, :
+        ]  # (1, prompt_length, vocab_size)
+        prompt_cache = [
+            entry.detach().numpy()
+            for key_value_tuple in out.past_key_values
+            for entry in key_value_tuple
+        ]  # List[(1, num_heads, past_length, head_dim)]
+
+        return generated_logits, prompt_logits, prompt_cache, generated_text
+
+    @staticmethod
+    def _create_tokenizer(model_name):
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.padding_side = "left"
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+
+        return tokenizer