neuralmagic · rahul-tuli · Feb 13, 2024 · Jan 24, 2024 · Feb 13, 2024
diff --git a/setup.py b/setup.py
@@ -167,8 +167,7 @@ def _parse_requirements_file(file_path):
 _haystack_integration_deps = _parse_requirements_file(_haystack_requirements_file_path)
 _clip_deps = [
     "open_clip_torch==2.20.0",
-    "scipy<1.10,>=1.8",
-    "transformers<4.35",
+    "transformers<4.37",
 ]
 
 

diff --git a/src/deepsparse/clip/zeroshot_pipeline.py b/src/deepsparse/clip/zeroshot_pipeline.py
@@ -20,7 +20,7 @@
 
 from deepsparse.clip import CLIPTextInput, CLIPVisualInput
 from deepsparse.legacy.pipeline import BasePipeline, Pipeline
-from scipy.special import softmax
+from deepsparse.utils import numpy_softmax
 
 
 __all__ = ["CLIPZeroShotInput", "CLIPZeroShotOutput", "CLIPZeroShotPipeline"]
@@ -103,7 +103,7 @@ def __call__(self, *args, **kwargs):
         text_output /= lingalg.norm(text_output, axis=-1, keepdims=True)
 
         output_product = 100.0 * visual_output @ text_output.T
-        text_probs = softmax(output_product, axis=-1)
+        text_probs = numpy_softmax(output_product, axis=-1)
 
         return self.output_schema(text_scores=np.vsplit(text_probs, len(text_probs)))
 

diff --git a/src/deepsparse/server/openai_server.py b/src/deepsparse/server/openai_server.py
@@ -46,10 +46,9 @@
 )
 from deepsparse.server.server import Server
 from deepsparse.tasks import SupportedTasks
-from deepsparse.utils import InferenceState
+from deepsparse.utils import InferenceState, numpy_softmax
 from fastapi import BackgroundTasks, FastAPI, Request
 from fastapi.responses import StreamingResponse
-from scipy.special import softmax
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -493,7 +492,7 @@ def create_logprobs(
     tokens = pipeline.tokenizer.batch_decode(token_ids)
 
     for i in range(len(tokens)):
-        log_prob = float(numpy.log(max(softmax(scores[i]))))
+        log_prob = float(numpy.log(max(numpy_softmax(scores[i]))))
         logprobs.tokens.append(tokens[i])
         logprobs.token_logprobs.append(log_prob)
 

diff --git a/src/deepsparse/transformers/metrics.py b/src/deepsparse/transformers/metrics.py
@@ -21,7 +21,6 @@
 import numpy
 
 from deepsparse.utils.data import numpy_log_softmax
-from scipy.special import log_softmax
 from sklearn.metrics import precision_recall_fscore_support
 
 
@@ -214,7 +213,7 @@ def _cross_entropy(
         float: The computed cross-entropy loss.
     """
 
-    logp = log_softmax(predictions, axis=-1)
+    logp = numpy_log_softmax(predictions, axis=-1)
     neg_log_likelihoods = -1.0 * numpy.take_along_axis(
         logp, numpy.expand_dims(targets, axis=-1), axis=-1
     )

diff --git a/src/deepsparse/utils/data.py b/src/deepsparse/utils/data.py
@@ -170,6 +170,36 @@ def numpy_softmax(x: numpy.ndarray, axis: int = 0):
     return softmax_x
 
 
+def numpy_log_softmax(x: numpy.ndarray, axis: int = 0):
+    """
+    Ref: https://github.com/scipy/scipy/blob/v1.12.0/scipy/special/_logsumexp.py
+
+    In principle: log_softmax(x) = log(softmax(x))
+    but using a more accurate implementation.
+
+    :param x: array containing values to be softmaxed
+    :param axis: axis across which to perform softmax
+    :return: x with values across axis softmaxed
+    """
+    x_max = numpy.max(x, axis=axis, keepdims=True)
+
+    if x_max.ndim > 0:
+        x_max[~numpy.isfinite(x_max)] = 0
+    elif not numpy.isfinite(x_max):
+        x_max = 0
+
+    tmp = x - x_max
+    exp_tmp = numpy.exp(tmp)
+
+    # suppress warnings about log of zero
+    with numpy.errstate(divide="ignore"):
+        s = numpy.sum(exp_tmp, axis=axis, keepdims=True)
+        out = numpy.log(s)
+
+    out = tmp - out
+    return out
+
+
 def split_engine_inputs(
     items: List[numpy.ndarray], batch_size: int
 ) -> Tuple[List[List[numpy.ndarray]], int]:

diff --git a/tests/server/test_openai.py b/tests/server/test_openai.py
@@ -24,8 +24,8 @@
     ModelPermission,
     OpenAIServer,
 )
+from deepsparse.utils import numpy_softmax
 from fastapi.testclient import TestClient
-from scipy.special import softmax
 
 
 TEST_MODEL_ID = "hf:mgoin/TinyStories-1M-ds"
@@ -246,7 +246,7 @@ def test_logprobs(client, model_card):
 
     for local_gen, server_gen in zip(output.generations, response.json()["choices"]):
         local_top1_logprobs = [
-            numpy.log(max(softmax(logits))) for logits in local_gen.score
+            numpy.log(max(numpy_softmax(logits))) for logits in local_gen.score
         ]
         server_top1_logprobs = server_gen["logprobs"]["token_logprobs"]
         assert numpy.allclose(local_top1_logprobs, server_top1_logprobs)