Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing tests for Perceiver #14739

Merged
merged 11 commits into from
Dec 14, 2021
10 changes: 10 additions & 0 deletions src/transformers/models/perceiver/modeling_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,6 +1268,7 @@ def forward(
output_hidden_states=None,
labels=None,
return_dict=None,
pixel_values=None,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Expand Down Expand Up @@ -1296,6 +1297,10 @@ def forward(
>>> predicted_class_idx = logits.argmax(-1).item()
>>> print("Predicted class:", model.config.id2label[predicted_class_idx])
"""
if inputs is not None and pixel_values is not None:
raise ValueError("You cannot use both `inputs` and `pixel_values`")
elif inputs is None and pixel_values is not None:
inputs = pixel_values
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.perceiver(
Expand Down Expand Up @@ -1399,6 +1404,7 @@ def forward(
output_hidden_states=None,
labels=None,
return_dict=None,
pixel_values=None,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Expand Down Expand Up @@ -1427,6 +1433,10 @@ def forward(
>>> predicted_class_idx = logits.argmax(-1).item()
>>> print("Predicted class:", model.config.id2label[predicted_class_idx])
"""
if inputs is not None and pixel_values is not None:
raise ValueError("You cannot use both `inputs` and `pixel_values`")
elif inputs is None and pixel_values is not None:
inputs = pixel_values
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.perceiver(
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,8 @@ def pipeline(
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None

if task in {"audio-classification"}:
# Audio classification will never require a tokenizer.
if task in {"audio-classification", "image-classification"}:
# These will never require a tokenizer.
# the model on the other hand might have a tokenizer, but
# the files could be missing from the hub, instead of failing
# on such repos, we just force to not load it.
Expand Down
28 changes: 23 additions & 5 deletions tests/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,15 @@ def get_tiny_config_from_class(configuration_class):
model_tester = model_tester_class(parent=None)

if hasattr(model_tester, "get_pipeline_config"):
return model_tester.get_pipeline_config()
config = model_tester.get_pipeline_config()
elif hasattr(model_tester, "get_config"):
return model_tester.get_config()
config = model_tester.get_config()
else:
config = None
logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")

return config


@lru_cache(maxsize=100)
def get_tiny_tokenizer_from_checkpoint(checkpoint):
Expand All @@ -100,11 +103,17 @@ def get_tiny_tokenizer_from_checkpoint(checkpoint):
return tokenizer


def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_extractor_class):
try:
feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
except Exception:
feature_extractor = None
try:
if feature_extractor_class is not None:
feature_extractor = feature_extractor_class()
else:
feature_extractor = None
except Exception:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be more defined than just Exception ?

Copy link
Contributor Author

@Narsil Narsil Dec 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I have no idea what could crash on arbitrary initialization.

If we want to keep the magic being very tolerant here seems OK to me.
The code could fail because feature_extractor_class requires some argument to be defined, but also any exception that could be raised during __init__ so I don't see a way to be exhaustive here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason this feature_extractor_class argument is added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we need to initialize the feature extractor.

It does not work with from_pretrained above because the checkpoint == "deepmind/language-perceiver" (it retrieves from _CHECKPOINT_FOR_DOC.

But we do have the class, and feature_extractors usually don't carry a lot of information (like vocab.txt with tokenizers I mean) so trying to instantiate with default arguments make sense to me. Otherwise we would need another indirection to make from_pretrained work here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I see, makes sense!

feature_extractor = None
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)

Expand Down Expand Up @@ -168,7 +177,9 @@ def test(self):
self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
else:
tokenizer = None
feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
feature_extractor = get_tiny_feature_extractor_from_checkpoint(
checkpoint, tiny_config, feature_extractor_class
)

if tokenizer is None and feature_extractor is None:
self.skipTest(
Expand Down Expand Up @@ -218,6 +229,13 @@ def data(n):
if not tokenizer_classes:
# We need to test even if there are no tokenizers.
tokenizer_classes = [None]
else:
# Remove the non defined tokenizers
# ByT5 and Perceiver are bytes-level and don't define
# FastTokenizer, we can just ignore those.
tokenizer_classes = [
tokenizer_class for tokenizer_class in tokenizer_classes if tokenizer_class is not None
]
Comment on lines +232 to +238
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a third bytes-level tokenizer, which is CANINE. Should this be added here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a comment, I don't mind if it's not exhaustive. I think it clarifies enough the purpose of this line. Fine adding it though.


for tokenizer_class in tokenizer_classes:
if tokenizer_class is not None:
Expand Down
60 changes: 48 additions & 12 deletions tests/test_pipelines_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,7 @@

import unittest

from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
PerceiverConfig,
PreTrainedTokenizer,
is_vision_available,
)
from transformers import MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, PreTrainedTokenizer, is_vision_available
from transformers.pipelines import ImageClassificationPipeline, pipeline
from transformers.testing_utils import (
is_pipeline_test,
Expand All @@ -28,6 +23,7 @@
require_tf,
require_torch,
require_vision,
slow,
)

from .test_pipelines_common import ANY, PipelineTestCaseMeta
Expand All @@ -50,12 +46,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING

def get_test_pipeline(self, model, tokenizer, feature_extractor):
if isinstance(model.config, PerceiverConfig):
self.skipTest(
"Perceiver model tester is defined with a language one, which has no feature_extractor, so the automated test cannot work here"
)

image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
Expand Down Expand Up @@ -167,3 +158,48 @@ def test_custom_tokenizer(self):
image_classifier = pipeline("image-classification", model="lysandre/tiny-vit-random", tokenizer=tokenizer)

self.assertIs(image_classifier.tokenizer, tokenizer)

@slow
@require_torch
def test_perceiver(self):
# Perceiver is not tested by `run_pipeline_test` properly.
# That is because the type of feature_extractor and model preprocessor need to be kept
# in sync, which is not the case in the current design
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-conv")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.4385, "label": "tabby, tabby cat"},
{"score": 0.321, "label": "tiger cat"},
{"score": 0.0502, "label": "Egyptian cat"},
{"score": 0.0137, "label": "crib, cot"},
{"score": 0.007, "label": "radiator"},
],
)

image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-fourier")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.5658, "label": "tabby, tabby cat"},
{"score": 0.1309, "label": "tiger cat"},
{"score": 0.0722, "label": "Egyptian cat"},
{"score": 0.0707, "label": "remote control, remote"},
{"score": 0.0082, "label": "computer keyboard, keypad"},
],
)

image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-learned")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.3022, "label": "tabby, tabby cat"},
{"score": 0.2362, "label": "Egyptian cat"},
{"score": 0.1856, "label": "tiger cat"},
{"score": 0.0324, "label": "remote control, remote"},
{"score": 0.0096, "label": "quilt, comforter, comfort, puff"},
],
)