diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py index d8034ca9fa56..4de5fe63efce 100644 --- a/src/transformers/models/llava_next/processing_llava_next.py +++ b/src/transformers/models/llava_next/processing_llava_next.py @@ -200,11 +200,11 @@ def _get_unpadded_features(self, height, width, patches_height, patches_width, s original_aspect_ratio = width / height current_aspect_ratio = current_width / current_height if original_aspect_ratio > current_aspect_ratio: - new_height = (height * current_width) // width + new_height = int(round(height * (current_width / width), 7)) padding = (current_height - new_height) // 2 current_height -= padding * 2 else: - new_width = (width * current_height) // height + new_width = int(round(width * (current_height / height), 7)) padding = (current_width - new_width) // 2 current_width -= padding * 2 diff --git a/src/transformers/models/llava_next_video/processing_llava_next_video.py b/src/transformers/models/llava_next_video/processing_llava_next_video.py index 349a2253a233..8a1294611b0d 100644 --- a/src/transformers/models/llava_next_video/processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/processing_llava_next_video.py @@ -253,11 +253,11 @@ def _get_unpadded_features(self, height, width, patches_height, patches_width, s original_aspect_ratio = width / height current_aspect_ratio = current_width / current_height if original_aspect_ratio > current_aspect_ratio: - new_height = (height * current_width) // width + new_height = int(round(height * (current_width / width), 7)) padding = (current_height - new_height) // 2 current_height -= padding * 2 else: - new_width = (width * current_height) // height + new_width = int(round(width * (current_height / height), 7)) padding = (current_width - new_width) // 2 current_width -= padding * 2 diff --git a/src/transformers/models/llava_onevision/processing_llava_onevision.py b/src/transformers/models/llava_onevision/processing_llava_onevision.py index eded6084e91e..f4ca90f28c21 100644 --- a/src/transformers/models/llava_onevision/processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/processing_llava_onevision.py @@ -225,6 +225,7 @@ def _get_number_of_features(self, orig_height: int, orig_width: int, height: int num_image_tokens = unpadded_features + newline_features + base_features return num_image_tokens + # Adapted from transformers.models.llava_next.processing_llava_next.LlavaNextProcessor._get_unpadded_features def _get_unpadded_features(self, height, width, patches_height, patches_width, scale_height, scale_width): """ Get number of features for a given image with height/width. LLaVA-NeXT is different from LLaVA @@ -237,11 +238,11 @@ def _get_unpadded_features(self, height, width, patches_height, patches_width, s original_aspect_ratio = width / height current_aspect_ratio = current_width / current_height if original_aspect_ratio > current_aspect_ratio: - new_height = int(height * (current_width / width)) + new_height = int(round(height * (current_width / width), 7)) padding = (current_height - new_height) // 2 current_height -= padding * 2 else: - new_width = int(width * (current_height / height)) + new_width = int(round(width * (current_height / height), 7)) padding = (current_width - new_width) // 2 current_width -= padding * 2