Skip to content

Commit

Permalink
Add solver (ggerganov#4)
Browse files Browse the repository at this point in the history
* add solver

* update solver

---------

Co-authored-by: syx <[email protected]>
  • Loading branch information
2 people authored and hodlen committed Dec 16, 2023
1 parent 9adba26 commit 1557b81
Showing 1 changed file with 106 additions and 0 deletions.
106 changes: 106 additions & 0 deletions solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python
# coding=utf-8
import argparse
from cvxopt.glpk import ilp
import numpy as np
from cvxopt import matrix
import torch
import pickle

# Set up command line arguments
parser = argparse.ArgumentParser(description='Optimize neuron activation based on VRAM capacity and other parameters.')
parser.add_argument('--activation_path', type=str, required=True, help='Path to the directory containing activation data.')
parser.add_argument('--neuron', type=int, default=8192*4, help='Total number of neurons in the network.')
parser.add_argument('--capacity', type=int, default=int(8192*4*32*0.1), help='Total VRAM capacity for the model.')
parser.add_argument('--layer', type=int, default=59, help='Total number of layers in the neural network.')
parser.add_argument('--batch', type=int, default=32, help='Batch size for processing.')
parser.add_argument('--threshold', type=int, default=512, help='Threshold for splitting a layer across multiple GPUs.')
parser.add_argument('--output', type=str, required=True, help='File path for the output pickle file.')

args = parser.parse_args()

# Assigning command line arguments to variables
activation_path = args.activation_path
neuron = args.neuron
layer = args.layer
batch = args.batch
output_path = args.output

# Processing activation data
values = []
for i in range(layer):
# Load and sort activation data for each layer
freq = torch.load(f"{activation_path}/activation_{i}.pt")
freq, _ = torch.sort(freq, descending=True)
freq = freq * -1.0
freq = freq.view(-1, batch)
freq = freq.sum(dim=1)
freq = freq.tolist()
values += freq

# Padding zero values for additional constraints
for i in range(layer):
values += [0.0]
c = np.array(values, dtype=float)
c = matrix(c)

# Setting capacity and neuron count per batch
CAP = args.capacity
CAP = int(CAP / batch)
neuron = int(neuron / batch)
coeff = []
h = []

# Constraint 1: Total neuron activation constraint
lst = []
for i in range(neuron * layer):
lst.append(1)
for i in range(layer):
lst.append(0)
coeff.append(lst)
h.append(CAP)

# Constraint 2: Threshold constraint for GPU split per layer
for i in range(layer):
lst = [0] * (neuron * layer + layer)
for j in range(neuron):
lst[i * neuron + j] = -1
lst[neuron * layer + i] = int(args.threshold / batch)
coeff.append(lst)
h.append(0)

# Constraint 3: Upper bound on neuron activations
for i in range(layer):
lst = [0] * (neuron * layer + layer)
for j in range(neuron):
lst[i * neuron + j] = 1
lst[neuron * layer + i] = -1000000 # Arbitrary large negative number as an upper bound
coeff.append(lst)
h.append(0)

# Convert lists to matrix format for ILP solver
coeff = np.array(coeff, dtype=float)
G = matrix(coeff)
h = np.array(h, dtype=float)
h = matrix(h)

# Define the set of integer and binary variables
I = set(range(neuron * layer + layer))
B = set()

# Solving the ILP problem
(status, x) = ilp(c, G, h, None, None, B, I)
print(f"ILP Status: {status}")
ans = list(x)
print(f"Total Activation Units: {sum(ans)}")

# Serialize the solution
serialize = []
for i in range(layer):
serialize.append(sum(ans[i * neuron:i * neuron + neuron] * batch))

aligned_lst = serialize

# Save the solution to a pickle file
with open(output_path, 'wb') as handle:
pickle.dump(aligned_lst, handle)

0 comments on commit 1557b81

Please sign in to comment.