Skip to content

Commit

Permalink
(Experimental) Add support to NTK RoPE scaling (#118)
Browse files Browse the repository at this point in the history
* (Experimental) Add support to NTK RoPE scaling

* (Experimental) Add support to NTK RoPE scaling

* Revert alpha value to be set manually

* Apply correctly the value when using -a

* Add calculate rotary embedding base correctly on model_init.py

* Remove obsolete code
  • Loading branch information
Panchovix authored Jul 1, 2023
1 parent 93d50d1 commit 8229d55
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def __init__(self, model_config_path):
self.max_input_len = 2048 # Maximum length of input IDs in a single forward pass. Sequences longer than this will be processed in multiple steps
self.max_attention_size = 2048**2 # Sequences will be processed in chunks to keep the size of the attention weights matrix <= this
self.compress_pos_emb = 1.0 # Increase to compress positional embeddings applied to sequence
self.alpha_value = 1.0 # Alpha value for NTK RoPE scaling. Similar to compress_pos_emb, higher values increaste ctx but add Perplexity.
self.gpu_peer_fix = False # Apparently Torch can have problems transferring tensors directly one GPU to another sometimes. Enable this to expliticly move tensors via system RAM instead, where needed
self.auto_map = None # List of floats with memory allocation in GB, per CUDA device, overrides device_map

# Tuning

self.matmul_recons_thd = 8
Expand Down Expand Up @@ -109,6 +109,8 @@ def set_auto_map(self, map_string):
if map_string is None: self.auto_map = None
else: self.auto_map = [float(alloc) for alloc in map_string.split(",")]

def calculate_rotary_embedding_base(self):
self.rotary_embedding_base = self.rotary_embedding_base * self.alpha_value ** (self.head_dim / (self.head_dim-2))

# 4-bit linear layer implementation

Expand Down
6 changes: 6 additions & 0 deletions model_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def add_args(parser):
parser.add_argument("-gs", "--gpu_split", type = str, help = "Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. -gs 20,7,7")
parser.add_argument("-l", "--length", type = int, help = "Maximum sequence length", default = 2048)
parser.add_argument("-cpe", "--compress_pos_emb", type = float, help = "Compression factor for positional embeddings", default = 1.0)
parser.add_argument("-a", "--alpha", type = float, help = "alpha for context size extension via embedding extension", default = 1.0)

parser.add_argument("-gpfix", "--gpu_peer_fix", action = "store_true", help = "Prevent direct copies of data between GPUs")

Expand Down Expand Up @@ -79,6 +80,9 @@ def print_options(args, extra_options = None):
if args.compress_pos_emb != 1.0:
print(f" -- RoPE compression factor: {args.compress_pos_emb}")

if args.alpha != 1.0:
print(f" -- RoPE alpha factor: {args.alpha}")

print(f" -- Tuning:")
print(f" -- --matmul_recons_thd: {args.matmul_recons_thd}" + (" (disabled)" if args.matmul_recons_thd == 0 else ""))
print(f" -- --fused_mlp_thd: {args.fused_mlp_thd}" + (" (disabled)" if args.fused_mlp_thd == 0 else ""))
Expand All @@ -105,6 +109,8 @@ def make_config(args):
config.compress_pos_emb = args.compress_pos_emb
config.set_auto_map(args.gpu_split)
config.gpu_peer_fix = args.gpu_peer_fix
config.alpha_value = args.alpha
config.calculate_rotary_embedding_base()

config.matmul_recons_thd = args.matmul_recons_thd
config.fused_mlp_thd = args.fused_mlp_thd
Expand Down

0 comments on commit 8229d55

Please sign in to comment.