Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert falcon exception #26472

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions src/transformers/models/auto/auto_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,7 @@
logging,
requires_backends,
)
from .configuration_auto import (
AutoConfig,
model_type_to_module_name,
replace_list_option_in_docstrings,
sanitize_code_revision,
)
from .configuration_auto import AutoConfig, model_type_to_module_name, replace_list_option_in_docstrings


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -471,9 +466,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
commit_hash = kwargs.pop("_commit_hash", None)
adapter_kwargs = kwargs.pop("adapter_kwargs", None)

revision = hub_kwargs.pop("revision", None)
hub_kwargs["revision"] = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code)

token = hub_kwargs.pop("token", None)
use_auth_token = hub_kwargs.pop("use_auth_token", None)
if use_auth_token is not None:
Expand Down
24 changes: 0 additions & 24 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,9 +1031,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
trust_remote_code = kwargs.pop("trust_remote_code", None)
code_revision = kwargs.pop("code_revision", None)

revision = kwargs.pop("revision", None)
kwargs["revision"] = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code)

config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]
has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING
Expand Down Expand Up @@ -1081,24 +1078,3 @@ def register(model_type, config, exist_ok=False):
"match!"
)
CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)


def sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code):
if revision in ["main", None] and not trust_remote_code:
revision_dict = {
"tiiuae/falcon-7b": "4e2d06f0a7c6370ebabbc30c6f59377ae8f73d76",
"tiiuae/falcon-7b-instruct": "f8dac3fff96d5debd43edf56fb4e1abcfffbef28",
"tiiuae/falcon-40b": "f1ba7d328c06aa6fbb4a8afd3c756f46d7e6b232",
"tiiuae/falcon-40b-instruct": "7475ff8cfc36ed9a962b658ae3c33391566a85a5",
}

if isinstance(pretrained_model_name_or_path, str) and pretrained_model_name_or_path.lower() in revision_dict:
revision = revision_dict.get(pretrained_model_name_or_path.lower())
logger.warning(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
f"transformers library implementation. {pretrained_model_name_or_path}'s revision is set to a version that doesn't "
f"leverage remote code ({revision}).\n\nIn order to override this, please set a revision manually or set "
"`trust_remote_code=True`."
)

return revision
27 changes: 0 additions & 27 deletions src/transformers/models/falcon/configuration_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Falcon configuration"""
import os
from typing import Optional, Union

from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto.configuration_auto import sanitize_code_revision


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -193,26 +189,3 @@ def _rope_scaling_validation(self):
)
if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
raise ValueError(f"`rope_scaling`'s factor field must be an float > 1, got {rope_scaling_factor}")

@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Union[str, os.PathLike],
cache_dir: Optional[Union[str, os.PathLike]] = None,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
**kwargs,
) -> "PretrainedConfig":
revision = sanitize_code_revision(pretrained_model_name_or_path, revision, kwargs.get("trust_remote_code"))

return super().from_pretrained(
pretrained_model_name_or_path,
cache_dir=cache_dir,
force_download=force_download,
local_files_only=local_files_only,
token=token,
revision=revision,
**kwargs,
)
33 changes: 0 additions & 33 deletions src/transformers/models/falcon/modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"""PyTorch Falcon model."""

import math
import os
from typing import Optional, Tuple, Union

import torch
Expand All @@ -39,7 +38,6 @@
is_flash_attn_available,
logging,
)
from ..auto.configuration_auto import sanitize_code_revision
from .configuration_falcon import FalconConfig


Expand Down Expand Up @@ -977,37 +975,6 @@ def _convert_to_rw_cache(
for layer_past in past_key_value
)

@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
*model_args,
config: Optional[Union[str, os.PathLike]] = None,
cache_dir: Optional[Union[str, os.PathLike]] = None,
ignore_mismatched_sizes: bool = False,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
use_safetensors: bool = None,
**kwargs,
):
revision = sanitize_code_revision(pretrained_model_name_or_path, revision, kwargs.get("trust_remote_code"))

return super().from_pretrained(
pretrained_model_name_or_path,
*model_args,
config=config,
cache_dir=cache_dir,
ignore_mismatched_sizes=ignore_mismatched_sizes,
force_download=force_download,
local_files_only=local_files_only,
token=token,
revision=revision,
use_safetensors=use_safetensors,
**kwargs,
)


@add_start_docstrings(
"The bare Falcon Model transformer outputting raw hidden-states without any specific head on top.",
Expand Down
134 changes: 1 addition & 133 deletions tests/models/falcon/test_modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,13 @@
from parameterized import parameterized

from transformers import (
AutoConfig,
AutoModel,
AutoModelForCausalLM,
AutoTokenizer,
FalconConfig,
is_torch_available,
set_seed,
)
from transformers.testing_utils import CaptureLogger, require_bitsandbytes, require_torch, slow, tooslow, torch_device
from transformers.utils import logging as transformers_logging
from transformers.testing_utils import require_bitsandbytes, require_torch, slow, torch_device

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
Expand Down Expand Up @@ -538,132 +535,3 @@ def test_batched_generation(self):
self.assertLess(unpadded_inputs.input_ids.shape[-1], padded_inputs.input_ids.shape[-1]) # left-padding exists
self.assertEqual(unpadded_gen_text[0], expected_output)
self.assertEqual(padded_gen_text[0], expected_output)


# TODO Lysandre: Remove this in version v4.34
class FalconOverrideTest(unittest.TestCase):
supported_checkpoints = [
"tiiuae/falcon-7b",
"tiiuae/falcon-7b-instruct",
"tiiuae/falcon-40b",
"tiiuae/falcon-40b-instruct",
]

latest_revisions = {
"tiiuae/falcon-7b": "f7796529e36b2d49094450fb038cc7c4c86afa44",
"tiiuae/falcon-7b-instruct": "eb410fb6ffa9028e97adb801f0d6ec46d02f8b07",
"tiiuae/falcon-40b": "561820f7eef0cc56a31ea38af15ca1acb07fab5d",
"tiiuae/falcon-40b-instruct": "ca78eac0ed45bf64445ff0687fabba1598daebf3",
}

def test_config_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")

for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = FalconConfig.from_pretrained(supported_checkpoint, trust_remote_code=False)
config2 = FalconConfig.from_pretrained(supported_checkpoint)

self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)

self.assertEqual(config1.to_dict(), config2.to_dict())

def test_auto_config_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")

for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = AutoConfig.from_pretrained(supported_checkpoint, trust_remote_code=False)
config2 = AutoConfig.from_pretrained(supported_checkpoint)

self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)

self.assertEqual(config1.to_dict(), config2.to_dict())

def test_config_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = FalconConfig.from_pretrained(supported_checkpoint, trust_remote_code=True)

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])

def test_auto_config_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoConfig.from_pretrained(supported_checkpoint, trust_remote_code=True)

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])

def test_config_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = FalconConfig.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
)

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])

def test_auto_config_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoConfig.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
)

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])

@tooslow
def test_model_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = FalconModel.from_pretrained(supported_checkpoint, trust_remote_code=False).config
config2 = FalconModel.from_pretrained(supported_checkpoint).config

# trust_remote_code only works with Auto Classes !
config3 = FalconModel.from_pretrained(supported_checkpoint, trust_remote_code=True).config

self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)

self.assertEqual(config1.to_dict(), config2.to_dict())
self.assertEqual(config1.to_dict(), config3.to_dict())

@tooslow
def test_auto_model_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = AutoModel.from_pretrained(supported_checkpoint, trust_remote_code=False).config
config2 = AutoModel.from_pretrained(supported_checkpoint).config

self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)

self.assertEqual(config1.to_dict(), config2.to_dict())

@tooslow
def test_auto_model_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoModel.from_pretrained(supported_checkpoint, trust_remote_code=True).config

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])

@tooslow
def test_auto_model_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoModel.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
).config

self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])