From 53d8115212ba8fd22546c9ac55c5c28aedc80e52 Mon Sep 17 00:00:00 2001
From: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:08:56 +0100
Subject: [PATCH] DOC Better document init_lora_weights=False option (#2347)

Resolves #2212

The documentation for the LoraConfig option init_lora_weights is
ambiguous. This PR updates the docstring and help to make it clearer
what this option does.

I also changed the order of the options (True -> False -> Gaussian ->
rest, instead of True -> Gaussian -> False -> rest), as that made more
sense to me.

The remaining parts of the docstring were left untouched, except for
changing line breaks (to better adhere to the 120 chars limit) and
adding missing spaces at the end of a few lines.
---
 src/peft/tuners/lora/config.py | 38 ++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
index b36de0c43a..1bec83f621 100644
--- a/src/peft/tuners/lora/config.py
+++ b/src/peft/tuners/lora/config.py
@@ -235,11 +235,13 @@ class LoraConfig(PeftConfig):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
         init_lora_weights (`bool` | `Literal["gaussian", "eva", "olora", "pissa", "pissa_niter_[number of iters]", "corda", "loftq"]`):
             How to initialize the weights of the adapter layers. Passing True (default) results in the default
-            initialization from the reference implementation from Microsoft. Passing 'gaussian' results in Gaussian
-            initialization scaled by the LoRA rank for linear and layers. Setting the initialization to False leads to
-            completely random initialization and is discouraged. Pass `'loftq'` to use LoftQ initialization. Passing
-            `'eva'` results in a data-driven initialization of <ahref='https://arxiv.org/abs/2410.07170' >Explained
-            Variance Adaptation</a>. EVA initalizes LoRA based on the SVD of layer input activations and achieves SOTA
+            initialization from the reference implementation from Microsoft, with the LoRA B weight being set to 0.
+            This means that without further training, the LoRA adapter will be a no-op. Setting the initialization to
+            False leads to random initialization of LoRA A and B, meaning that LoRA is not a no-op before training;
+            this setting is intended for debugging purposes. Passing 'gaussian' results in Gaussian initialization
+            scaled by the LoRA rank for linear and layers. Pass `'loftq'` to use LoftQ initialization. Passing `'eva'`
+            results in a data-driven initialization of <ahref='https://arxiv.org/abs/2410.07170' >Explained Variance
+            Adaptation</a>. EVA initalizes LoRA based on the SVD of layer input activations and achieves SOTA
             performance due to its ability to adapt to the finetuning data. Pass `'olora'` to use OLoRA initialization.
             Passing `'pissa'` results in the initialization of <ahref='https://arxiv.org/abs/2404.02948' >Principal
             Singular values and Singular vectors Adaptation (PiSSA)</a>, which converges more rapidly than LoRA and
@@ -352,17 +354,21 @@ class LoraConfig(PeftConfig):
         default=True,
         metadata={
             "help": (
-                "How to initialize the weights of the LoRA layers. Passing `'True'` (default) results in the default "
-                "initialization from the reference implementation from Microsoft. Passing `'gaussian'` results "
-                "in Gaussian initialization scaled by the LoRA rank for linear and layers. Setting the initialization "
-                "to `'False'` leads to completely random initialization and *is discouraged.*"
-                "Pass `'eva'` results in a data-driven initialization of Explained Variance Adaptation."
-                "Passing `'olora'` results in OLoRA initialization."
-                "Passing `'pissa'` results in PiSSA initialization."
-                "Passing `'pissa_niter_[number of iters]'` initiates Fast-SVD-based PiSSA initialization, "
-                "where [number of iters] indicates the number of subspace iterations to perform fsvd, and must be a nonnegative integer."
-                "Passing `'corda'` results in CorDA initialization."
-                "Pass `'loftq'` to use LoftQ initialization"
+                "How to initialize the weights of the LoRA layers. "
+                "Passing True (default) results in the default initialization from the reference implementation from "
+                "Microsoft, with the LoRA B weight being set to 0. This means that without further training, the LoRA "
+                "adapter will be a no-op. "
+                "Setting the initialization to False leads to random initialization of LoRA A and B, meaning that LoRA "
+                "is not a no-op before training; this setting is intended for debugging purposes. "
+                "Passing `'gaussian'` results in Gaussian initialization scaled by the LoRA rank for linear and layers. "
+                "Passing `'eva'` results in a data-driven initialization of Explained Variance Adaptation. "
+                "Passing `'olora'` results in OLoRA initialization. "
+                "Passing `'pissa'` results in PiSSA initialization. "
+                "Passing `'pissa_niter_[number of iters]'` initiates Fast-SVD-based PiSSA initialization, where "
+                "[number of iters] indicates the number of subspace iterations to perform fsvd, and must be a "
+                "nonnegative integer. "
+                "Passing `'corda'` results in CorDA initialization. "
+                "Pass `'loftq'` to use LoftQ initialization."
             ),
         },
     )