Skip to content

Commit

Permalink
Lazily import HPU-dependent components
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Oct 4, 2024
1 parent 38e60f4 commit e62f43d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 7 deletions.
4 changes: 1 addition & 3 deletions vllm/executor/hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import os
from typing import Any, Dict, List, Optional, Set, Tuple

from vllm_hpu_extension.profiler import HabanaMemoryProfiler

from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
Expand Down Expand Up @@ -86,7 +84,7 @@ def initialize_cache(self, num_gpu_blocks: int, num_cpu_blocks) -> None:
# remains to abstract away the device for non-GPU configurations.
logger.info("# HPU blocks: %d, # CPU blocks: %d", num_gpu_blocks,
num_cpu_blocks)

from vllm_hpu_extension.profiler import HabanaMemoryProfiler
with HabanaMemoryProfiler() as cache_init_m:
self.driver_worker.initialize_cache(num_gpu_blocks, num_cpu_blocks)
msg = f"init_cache_engine took {cache_init_m.get_summary_string()}"
Expand Down
7 changes: 3 additions & 4 deletions vllm/model_executor/layers/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@
from vllm.model_executor.custom_op import CustomOp
from vllm.platforms import current_platform

if current_platform.is_hpu():
from vllm_hpu_extension.rotary_embed import (HpuLlama3RotaryEmbedding,
HpuRotaryEmbedding)


def _rotate_neox(x: torch.Tensor) -> torch.Tensor:
x1 = x[..., :x.shape[-1] // 2]
Expand Down Expand Up @@ -923,6 +919,7 @@ def get_rope(

if rope_scaling is None:
if current_platform.is_hpu():
from vllm_hpu_extension.rotary_embed import HpuRotaryEmbedding
rotary_emb = HpuRotaryEmbedding(head_size,
rotary_dim,
max_position,
Expand All @@ -945,6 +942,8 @@ def get_rope(
original_max_position = rope_scaling[
"original_max_position_embeddings"]
if current_platform.is_hpu():
from vllm_hpu_extension.rotary_embed import (
HpuLlama3RotaryEmbedding)
rotary_emb = HpuLlama3RotaryEmbedding(
head_size,
rotary_dim,
Expand Down

0 comments on commit e62f43d

Please sign in to comment.