-
Notifications
You must be signed in to change notification settings - Fork 488
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fix gptq tests * simplify ci * enable determinism * fix * add more expected outputs * last one * hopefully * more expected putputs with each run * new one * add evaluation * fix * remove gptq extra * style check
- Loading branch information
1 parent
291f535
commit 9c882fd
Showing
5 changed files
with
166 additions
and
185 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,46 @@ | ||
name: GPTQ Quantization / Test GPU | ||
name: GPTQ / Python - Test | ||
|
||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: 0 1 */3 * * # at 1am every 3 days | ||
push: | ||
branches: [main] | ||
paths: | ||
- tests/gptq/** | ||
- optimum/gptq/** | ||
- .github/workflows/test_gptq.yml | ||
pull_request: | ||
types: [opened, synchronize, reopened, labeled] | ||
# uncomment to enable on PR merge on main branch: | ||
#push: | ||
# branches: | ||
# - main | ||
branches: [main] | ||
paths: | ||
- tests/gptq/** | ||
- optimum/gptq/** | ||
- .github/workflows/test_gptq.yml | ||
schedule: | ||
# every day at midnight | ||
- cron: "0 0 * * *" | ||
|
||
jobs: | ||
do-the-job: | ||
if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} | ||
name: Start self-hosted EC2 runner | ||
test_gptq: | ||
runs-on: [single-gpu, nvidia-gpu, t4, ci] | ||
env: | ||
AWS_REGION: us-east-1 | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
- name: Build image | ||
run: | | ||
docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu . | ||
- name: Test with unittest within docker container | ||
run: | | ||
docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Run tests | ||
uses: addnab/docker-run-action@v3 | ||
with: | ||
image: pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime | ||
# latest auto-gptq was built with pytorch 2.2 and cuda 12.1 | ||
options: | | ||
--rm | ||
--gpus all | ||
--shm-size 16G | ||
--env RUN_SLOW=1 | ||
--env HF_HOME=/mnt/cache/ | ||
--volume /mnt/cache/:/mnt/cache/ | ||
--volume ${{ github.workspace }}:/workspace | ||
--workdir /workspace | ||
run: | | ||
pip install auto-gptq | ||
pip install -e .[tests] | ||
pytest tests/gptq -s -vvvv --durations=0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import torch | ||
import torch.nn as nn | ||
from datasets import load_dataset | ||
from tqdm import tqdm | ||
|
||
|
||
def evaluate_perplexity(model, tokenizer): | ||
def _perplexity(nlls, n_samples, seqlen): | ||
return torch.exp(torch.stack(nlls).sum() / (n_samples * seqlen)) | ||
|
||
# load and prepare dataset | ||
data = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") | ||
data = tokenizer("\n\n".join(data["text"]), return_tensors="pt") | ||
data = data.input_ids.to(model.device) | ||
|
||
seqlen = 512 | ||
model = model.eval() | ||
n_samples = data.numel() // seqlen | ||
|
||
nlls = [] | ||
|
||
with tqdm(range(n_samples), desc="Perplexity -") as progress_bar: | ||
for i in progress_bar: | ||
start_index = i * seqlen | ||
end_index = (i + 1) * seqlen | ||
batch = data[:, start_index:end_index].to(model.device) | ||
with torch.no_grad(): | ||
logits = model(batch).logits | ||
shift_logits = logits[:, :-1, :].contiguous().float() | ||
shift_labels = data[:, start_index:end_index][:, 1:] | ||
loss_fct = nn.CrossEntropyLoss() | ||
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) | ||
neg_log_likelihood = loss.float() * seqlen | ||
nlls.append(neg_log_likelihood) | ||
|
||
curr_ppl = _perplexity(nlls, i + 1, seqlen) | ||
progress_bar.set_description(f"Perplexity {curr_ppl:.3f}") | ||
|
||
ppl = _perplexity(nlls, n_samples, seqlen) | ||
|
||
return ppl.item() |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.