Skip to content

Commit

Permalink
add notation
Browse files Browse the repository at this point in the history
  • Loading branch information
CRZbulabula committed Oct 23, 2024
1 parent 51c24c9 commit 85034d4
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 2 additions & 3 deletions tests/compile/test_basic_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
"model, model_args, pp_size, tp_size, attn_backend, method, fullgraph",
[
("meta-llama/Llama-3.2-1B", [], 2, 2, "FLASH_ATTN", "generate", True),
("nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples",
["--quantization", "compressed-tensors"
], 1, 1, "FLASH_ATTN", "generate", True),
("THUDM/chatglm3-6b",
["--trust-remote-code"], 1, 1, "FLASH_ATTN", "generate", True),
("google/gemma-2-2b-it", [], 1, 2, "FLASHINFER", "generate", True),
# TODO: add multi-modality test for llava
("llava-hf/llava-1.5-7b-hf", [], 2, 1, "FLASHINFER", "generate", False)
Expand Down
2 changes: 2 additions & 0 deletions vllm/model_executor/models/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from torch.nn import LayerNorm

from vllm.attention import Attention, AttentionMetadata
from vllm.compilation.decorators import support_torch_compile
from vllm.config import CacheConfig, LoRAConfig, MultiModalConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.inputs import INPUT_REGISTRY, DecoderOnlyInputs, InputContext
Expand Down Expand Up @@ -472,6 +473,7 @@ def forward(
return hidden_states


@support_torch_compile
class ChatGLMModel(nn.Module):

def __init__(
Expand Down

0 comments on commit 85034d4

Please sign in to comment.