llama 3.1 has correct max_seq_len for all versions (#2203)

pytorch · Jan 10, 2025 · 262122b · 262122b
1 parent b68cddd
commit 262122b
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/torchtune/models/llama3_1/_model_builders.py b/torchtune/models/llama3_1/_model_builders.py
@@ -73,7 +73,7 @@ def llama3_1_405b() -> TransformerDecoder:
         num_heads=128,
         num_kv_heads=8,
         embed_dim=16384,
-        max_seq_len=8192,
+        max_seq_len=131072,
         intermediate_dim=53248,
         attn_dropout=0.0,
         norm_eps=1e-5,
@@ -236,7 +236,7 @@ def lora_llama3_1_405b(
         num_heads=128,
         num_kv_heads=8,
         embed_dim=16384,
-        max_seq_len=8192,
+        max_seq_len=131072,
         intermediate_dim=53248,
         attn_dropout=0.0,
         norm_eps=1e-5,