diff --git a/setup.py b/setup.py index d37bb1eb42..eb5d835984 100644 --- a/setup.py +++ b/setup.py @@ -167,8 +167,7 @@ def _parse_requirements_file(file_path): _haystack_integration_deps = _parse_requirements_file(_haystack_requirements_file_path) _clip_deps = [ "open_clip_torch==2.20.0", - "scipy<1.10,>=1.8", - "transformers<4.35", + "transformers<4.37", ] diff --git a/src/deepsparse/clip/zeroshot_pipeline.py b/src/deepsparse/clip/zeroshot_pipeline.py index fec1c7de54..fd7e59d90b 100644 --- a/src/deepsparse/clip/zeroshot_pipeline.py +++ b/src/deepsparse/clip/zeroshot_pipeline.py @@ -20,7 +20,7 @@ from deepsparse.clip import CLIPTextInput, CLIPVisualInput from deepsparse.legacy.pipeline import BasePipeline, Pipeline -from scipy.special import softmax +from deepsparse.utils import numpy_softmax __all__ = ["CLIPZeroShotInput", "CLIPZeroShotOutput", "CLIPZeroShotPipeline"] @@ -103,7 +103,7 @@ def __call__(self, *args, **kwargs): text_output /= lingalg.norm(text_output, axis=-1, keepdims=True) output_product = 100.0 * visual_output @ text_output.T - text_probs = softmax(output_product, axis=-1) + text_probs = numpy_softmax(output_product, axis=-1) return self.output_schema(text_scores=np.vsplit(text_probs, len(text_probs))) diff --git a/src/deepsparse/server/openai_server.py b/src/deepsparse/server/openai_server.py index 945ca3d77c..c656323594 100644 --- a/src/deepsparse/server/openai_server.py +++ b/src/deepsparse/server/openai_server.py @@ -46,10 +46,9 @@ ) from deepsparse.server.server import Server from deepsparse.tasks import SupportedTasks -from deepsparse.utils import InferenceState +from deepsparse.utils import InferenceState, numpy_softmax from fastapi import BackgroundTasks, FastAPI, Request from fastapi.responses import StreamingResponse -from scipy.special import softmax _LOGGER = logging.getLogger(__name__) @@ -493,7 +492,7 @@ def create_logprobs( tokens = pipeline.tokenizer.batch_decode(token_ids) for i in range(len(tokens)): - log_prob = float(numpy.log(max(softmax(scores[i])))) + log_prob = float(numpy.log(max(numpy_softmax(scores[i])))) logprobs.tokens.append(tokens[i]) logprobs.token_logprobs.append(log_prob) diff --git a/src/deepsparse/transformers/metrics.py b/src/deepsparse/transformers/metrics.py index acfe2e846b..786bde0135 100644 --- a/src/deepsparse/transformers/metrics.py +++ b/src/deepsparse/transformers/metrics.py @@ -21,7 +21,6 @@ import numpy from deepsparse.utils.data import numpy_log_softmax -from scipy.special import log_softmax from sklearn.metrics import precision_recall_fscore_support @@ -214,7 +213,7 @@ def _cross_entropy( float: The computed cross-entropy loss. """ - logp = log_softmax(predictions, axis=-1) + logp = numpy_log_softmax(predictions, axis=-1) neg_log_likelihoods = -1.0 * numpy.take_along_axis( logp, numpy.expand_dims(targets, axis=-1), axis=-1 ) diff --git a/src/deepsparse/utils/data.py b/src/deepsparse/utils/data.py index baaf50b551..72c1056074 100644 --- a/src/deepsparse/utils/data.py +++ b/src/deepsparse/utils/data.py @@ -170,6 +170,36 @@ def numpy_softmax(x: numpy.ndarray, axis: int = 0): return softmax_x +def numpy_log_softmax(x: numpy.ndarray, axis: int = 0): + """ + Ref: https://github.com/scipy/scipy/blob/v1.12.0/scipy/special/_logsumexp.py + + In principle: log_softmax(x) = log(softmax(x)) + but using a more accurate implementation. + + :param x: array containing values to be softmaxed + :param axis: axis across which to perform softmax + :return: x with values across axis softmaxed + """ + x_max = numpy.max(x, axis=axis, keepdims=True) + + if x_max.ndim > 0: + x_max[~numpy.isfinite(x_max)] = 0 + elif not numpy.isfinite(x_max): + x_max = 0 + + tmp = x - x_max + exp_tmp = numpy.exp(tmp) + + # suppress warnings about log of zero + with numpy.errstate(divide="ignore"): + s = numpy.sum(exp_tmp, axis=axis, keepdims=True) + out = numpy.log(s) + + out = tmp - out + return out + + def split_engine_inputs( items: List[numpy.ndarray], batch_size: int ) -> Tuple[List[List[numpy.ndarray]], int]: diff --git a/tests/server/test_openai.py b/tests/server/test_openai.py index 54fb74f5e2..a495a4212a 100644 --- a/tests/server/test_openai.py +++ b/tests/server/test_openai.py @@ -24,8 +24,8 @@ ModelPermission, OpenAIServer, ) +from deepsparse.utils import numpy_softmax from fastapi.testclient import TestClient -from scipy.special import softmax TEST_MODEL_ID = "hf:mgoin/TinyStories-1M-ds" @@ -246,7 +246,7 @@ def test_logprobs(client, model_card): for local_gen, server_gen in zip(output.generations, response.json()["choices"]): local_top1_logprobs = [ - numpy.log(max(softmax(logits))) for logits in local_gen.score + numpy.log(max(numpy_softmax(logits))) for logits in local_gen.score ] server_top1_logprobs = server_gen["logprobs"]["token_logprobs"] assert numpy.allclose(local_top1_logprobs, server_top1_logprobs)