huggingface · ParagEkbote · Dec 19, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml
@@ -1,6 +1,6 @@
 model:
   base_params:
-    model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.
+    model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/package_reference/models#lighteval.models.transformers.adapter_model.AdapterModelConfig
     dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
   merged_weights: # Ignore this section if you are not using PEFT models

diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml
@@ -1,9 +1,9 @@
 model:
   base_params:
-    model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
+    model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.  To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments .
     dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
-  merged_weights: # Ignore this section if you are not using PEFT models
+  merged_weights: # Ignore this section if you are not using PEFT models.
     delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name
     adapter_weights: false # set to True of your model has been trained with peft, also need to provide the base model name
     base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True

diff --git a/examples/model_configs/serverless_model.yaml b/examples/model_configs/serverless_model.yaml
@@ -1,3 +1,3 @@
 model:
   base_params:
-    model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B"
+    model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B".  #To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models
diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml
@@ -2,4 +2,4 @@ model:
   instance:
     inference_server_address: ""
     inference_server_auth: null
-    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
+    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#lighteval.models.endpoints.tgi_model.TGIModelConfig
diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py
@@ -97,6 +97,28 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig":
 
 @dataclass
 class InferenceEndpointModelConfig:
+    """
+    This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index)
+
+        Attributes:
+            endpoint_name (str, Optional): Inference endpoint name (to use at creation or reuse)
+            model_name (str, optional): The name of the model for inference.
+            reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint.
+            accelerator (str, default: "gpu"): Specifies the type of hardware accelerator.
+            model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None.
+            vendor (str, default: "aws"): Cloud service provider for hosting the endpoint.
+            region (str, default: "us-east-1"): Cloud region, chosen based on hardware availability.
+            instance_size (str, optional): Specifies the size of the instance (e.g., large, xlarge).
+            instance_type (str, optional): Specifies the type of the instance (e.g., g5.4xlarge).
+            framework (str, default: "pytorch"): Framework used for inference (e.g., pytorch, tensorflow).
+            endpoint_type (str, default: "protected"): Security level of the endpoint (e.g., public, protected).
+            add_special_tokens (bool, default: True): Specifies if special tokens should be added during processing.
+            revision (str, default: "main"): The Git branch or commit hash of the model.
+            namespace (str, optional): The namespace under which the endpoint is launched.
+            image_url (str, optional): Docker image URL for the endpoint.
+            env_vars (dict, optional): Environment variables for the endpoint.
+    """
+
     endpoint_name: str = None
     model_name: str = None
     reuse_existing: bool = False
@@ -133,7 +155,7 @@ def from_path(cls, path: str) -> "InferenceEndpointModelConfig":
         """Load configuration for inference endpoint model from YAML file path.
 
         Args:
-            path (`str`): Path of the model configuration YAML file.
+            path(`str`): Path of the model configuration YAML file.
 
         Returns:
             [`InferenceEndpointModelConfig`]: Configuration for inference endpoint model.
@@ -165,8 +187,7 @@ def get_custom_env_vars(self) -> Dict[str, str]:
 
 class InferenceEndpointModel(LightevalModel):
     """InferenceEndpointModels can be used both with the free inference client, or with inference
-    endpoints, which will use text-generation-inference to deploy your model for the duration of the evaluation.
-    """
+    endpoints, which will use text-generation-inference to deploy your model for the duration of the evaluation."""
 
     def __init__(  # noqa: C901
         self, config: Union[InferenceEndpointModelConfig, ServerlessEndpointModelConfig], env_config: EnvConfig

diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py
@@ -63,17 +63,48 @@
 
 @dataclass
 class OpenAIModelConfig:
+    """
+    Configuration class to create an [[OpenAIModel]], to call via its API at inference for evaluation.
+
+    Attributes:
+    model(str): name or identifier of the OpenAI model to be used for inference.
+    generation_parameters(None,GenerationParameters): Parameters for model generation. If not
+                                                     provided, defaults to a new instance
+                                                     of `GenerationParameters`.
+    """
+
     model: str
     generation_parameters: GenerationParameters = None
     base_url: str = "https://api.openai.com/v1"
     api_key: str = os.environ.get("OPENAI_API_KEY", None)
 
     def __post_init__(self):
+        """
+        Post-initialization that ensures the `generation_parameters` is set
+        to a valid `GenerationParameters`. If not provided, initializes a default one."""
         if not self.generation_parameters:
             self.generation_parameters = GenerationParameters()
 
     @classmethod
     def from_path(cls, path: str) -> "OpenAIModelConfig":
+        """
+        Creates an instance of `OpenAIModelConfig` from a YAML configuration file.
+
+        Loads the model configuration from a given file path and initializes the
+        `OpenAIModelConfig` with the model name and corresponding `GenerationParameters` parsed
+        from the file.
+
+        Args:
+            path(str): Path to the YAML configuration file containing the model configuration.
+
+        Returns:
+            OpenAIModelConfig: An instance of `OpenAIModelConfig` with the configuration loaded
+                               from the specified YAML file.
+
+        Raises:
+            FileNotFoundError: If the specified file path does not exist.
+            KeyError: If required keys are missing in the YAML configuration file.
+        """
         import yaml
 
         with open(path, "r") as f:
@@ -172,8 +203,8 @@ def greedy_until(
         Generates responses using a greedy decoding strategy until certain ending conditions are met.
 
         Args:
-            requests (list[Request]): list of requests containing the context and ending conditions.
-            override_bs (int, optional): Override the batch size for generation. Defaults to None.
+            requests(list[GreedyUntilRequest]): list of requests containing the context and ending conditions.
+            override_bs(int, optional): Override the batch size for generation. Defaults to None.
 
         Returns:
             list[GenerativeResponse]: list of generated responses.

diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py
@@ -48,6 +48,15 @@ def divide_chunks(array, n):
 
 @dataclass
 class TGIModelConfig:
+    """
+    Provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. To know more, please click here: https://huggingface.co/docs/text-generation-inference/index
+
+    Attributes:
+        inference_server_address (str, required):Endpoint address of the inference server hosting the model.
+        inference_server_auth (str, required): Authentication credentials or token required to access the server.
+        model_id (str, required): Identifier for the model hosted on the inference server.
+    """
+
     inference_server_address: str
     inference_server_auth: str
     model_id: str
@@ -62,7 +71,7 @@ def from_path(cls, path: str) -> "TGIModelConfig":
         """Load configuration for TGI endpoint model from YAML file path.
 
         Args:
-            path (`str`): Path of the model configuration YAML file.
+            path(`str`): Path of the model configuration YAML file.
 
         Returns:
             [`TGIModelConfig`]: Configuration for TGI endpoint model.

diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py
@@ -41,6 +41,15 @@
 
 @dataclass
 class AdapterModelConfig(TransformersModelConfig):
+    """
+    Manages the configuration of adapter models. Adapter models are designed to extend or adapt a
+    base model's functionality for specific tasks while keeping most of the base model's parameters frozen.
+
+    Attributes:
+        base_model (str): The name of the parent base model.
+            This model provides the tokenizer and configuration for the adapter model.Defaults to None if not specified.
+    """
+
     # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config
     base_model: str = None
 
@@ -54,11 +63,18 @@ def __post_init__(self):
         return super().__post_init__()
 
     def init_configs(self, env_config: EnvConfig):
+        """
+        Initializes the configurations of adapter models.
+        """
         return self._init_configs(self.base_model, env_config)
 
 
 class AdapterModel(TransformersModel):
+    """Integrates the adapter models with a pre-trained base model.
+    They are are lightweight layers added to pre-trained models for optimizing performance with minimal changes."""
+
     def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer:
+        """Creates and configures the adapter model by applying adapter weights to the base model."""
         # By default, we look at the model config for the model stored in `base_model`
         # (= the parent model, not the model of interest)
         return self._create_auto_tokenizer_with_name(

diff --git a/src/lighteval/models/transformers/delta_model.py b/src/lighteval/models/transformers/delta_model.py
@@ -38,6 +38,9 @@
 
 @dataclass
 class DeltaModelConfig(TransformersModelConfig):
+    """This class is used to manage the configuration class for delta models.
+    They capture the difference between a base model's predictions and actual targets to enhance accuracy."""
+
     # Delta models look at the pretrained (= the delta weights) for the tokenizer and model config
     base_model: str = None
 
@@ -59,7 +62,7 @@ def _create_auto_model(
         config: DeltaModelConfig,
         env_config: EnvConfig,
     ) -> AutoModelForCausalLM:
-        """Returns a model created by adding the weights of a delta model to a base model."""
+        """It returns a model created by adding the weights of a delta model to a base model."""
         config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel)
         torch_dtype = _get_dtype(config.dtype, self._config)
 

diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
@@ -75,6 +75,33 @@
 
 @dataclass
 class VLLMModelConfig:
+    """
+    This class defines the configuration parameters for deploying and running models using the vLLM framework.
+
+    Attributes:
+        pretrained (str, required): The identifier for the pretrained model (e.g., model name or path).
+        gpu_memory_utilisation (float, default: 0.9): Fraction of GPU memory to allocate for the model.
+            Reduce this value if you encounter memory issues.
+        revision (str, default: "main"): Specifies the branch or version of the model repository.
+        dtype (str | None, optional): Data type for computations (e.g., float32, float16, or bfloat16).
+            Defaults to the model's preset if None.
+        tensor_parallel_size (int, default: 1): Number of GPUs used for splitting tensors across devices.
+        pipeline_parallel_size (int, default: 1): Number of GPUs used for pipeline parallelism.
+        data_parallel_size (int, default: 1): Number of GPUs used for data parallelism.
+        max_model_length (int | None, optional): Maximum sequence length for the model.
+            If None, it is inferred automatically. Can be reduced to handle Out-of-Memory (OOM) issues.
+        swap_space (int, default: 4): Amount of CPU swap space (in GiB) per GPU for offloading.
+        seed (int, default: 1234): Seed for reproducibility in experiments.
+        trust_remote_code (bool, default: False): Whether to trust custom code provided by remote repositories.
+        use_chat_template (bool, default: False): Specifies if chat-specific templates should be used for input formatting.
+        add_special_tokens (bool, default: True): Indicates whether to add special tokens during tokenization.
+        multichoice_continuations_start_space (bool, default: True): Adds a space at the beginning of each continuation during
+            multi-choice generation.
+        pairwise_tokenization (bool, default: False): Specifies if context and continuation should be tokenized separately or together.
+        subfolder (Optional[str], optional): Path to a specific subfolder in the model repository, if applicable.
+        temperature (float, default: 0.6): Sampling temperature for stochastic tasks. Ignored for deterministic tasks (set internally to 0).
+    """
+
     pretrained: str
     gpu_memory_utilization: float = 0.9  # lower this if you are running out of memory
     revision: str = "main"  # revision of the model