Add more Float8 description (pytorch#284)

# Summary Add more the possible options in the configs and add a note on how to get the dependency at the top of the file.
tianyu-l · Apr 29, 2024 · 0c9d590 · 0c9d590
1 parent 0f8eb4c
commit 0c9d590
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/torchtitan/config_manager.py b/torchtitan/config_manager.py
@@ -220,7 +220,11 @@ def __init__(self):
                 "dynamic",
                 "",
             ],  # TODO: add "delayed" option back in when supported
-            help="Type of fp8 linear quantization to apply to the model ['', 'dynamic']",
+            help="""
+                Type of fp8 linear quantization to apply to the model ['', 'dynamic'].
+                This features requires you to install 'float8_experimental' which can be found
+                here: https://github.com/pytorch-labs/float8_experimental
+            """,
         )
         self.parser.add_argument(
             "--training.gc_freq",

diff --git a/torchtitan/float8_linear.py b/torchtitan/float8_linear.py
@@ -4,6 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# [Note] Getting the 'float8_experimental' package:
+# This script requires the 'float8_experimental' package to function correctly.
+# Please ensure you have this package installed from the appropriate repository.
+# You can obtain it from https://github.com/pytorch-labs/float8_experimental.
+# Either clone and run `pip install .` or run `pip install git+https://github.com/pytorch-labs/float8_experimental.git`
+
+# Note: Performance
+# Float8 experimental is intended to be ran under `torch.compile`` for competitive performance
+
 import torch.nn as nn
 
 from torchtitan.config_manager import JobConfig
@@ -14,7 +23,7 @@ def build_fp8_linear(model: nn.Module, job_config: JobConfig):
     """
     This function converts the linear layers to one of the fp8 types:
     - Float8DynamicLinear: Dynamic quantization of the weights and the activations
-    - Float8Linear: Uses a history of amaxs to quantize the weights and activations
+    - [Not Yet Supported] Float8Linear: Uses a history of amaxs to quantize the weights and activations
 
     This will mutate the model inplace.
     """