Avoid unnecessary multi-modal input data copy when len(batch) == 1 (#…

…12722) Signed-off-by: imkero <[email protected]>
vllm-project · Feb 4, 2025 · 62467a8 · 62467a8
1 parent 6469038
commit 62467a8
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
@@ -212,6 +212,11 @@ def build_elems(self, batch: NestedTensors) -> list[MultiModalFieldElem]:
 
     def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
         if len(batch) > 0 and is_list_of(batch, torch.Tensor, check="all"):
+            if len(batch) == 1:
+                # An optimization when `batch` contains only one tensor:
+                # - produce exactly same result as `torch.stack(batch)`
+                # - will achieve zero-copy if the tensor is contiguous
+                return batch[0].unsqueeze(0).contiguous()
             first_shape = batch[0].shape
             if all(elem.shape == first_shape for elem in batch):
                 return torch.stack(batch)
@@ -235,6 +240,11 @@ def build_elems(
 
     def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
         if len(batch) > 0 and is_list_of(batch, torch.Tensor, check="all"):
+            if len(batch) == 1:
+                # An optimization when `batch` contains only one tensor:
+                # - produce exactly same result as `torch.concat(batch)`
+                # - will achieve zero-copy if the tensor is contiguous
+                return batch[0].contiguous()
             first_shape = batch[0].shape
             if all(elem.shape[1:] == first_shape[1:] for elem in batch):
                 return torch.concat(batch)
@@ -407,6 +417,12 @@ def _try_stack(nested_tensors: NestedTensors) -> NestedTensors:
             return stacked
 
         tensors_ = cast(list[torch.Tensor], stacked)
+        if len(tensors_) == 1:
+            # An optimization when `tensors_` contains only one tensor:
+            # - produce exactly same result as `torch.stack(tensors_)`
+            # - will achieve zero-copy if the tensor is contiguous
+            return tensors_[0].unsqueeze(0).contiguous()
+
         if any(t.shape != tensors_[0].shape for t in tensors_):
             # The tensors have incompatible shapes and can't be stacked.
             return tensors_