Skip to content

Commit

Permalink
Revert "Merge branch 'main' of github.com:ROCm/vllm" (vllm-project#72)
Browse files Browse the repository at this point in the history
This reverts commit 2a3cbf9, reversing
changes made to 367aa5a.
  • Loading branch information
mawong-amd authored Jun 28, 2024
1 parent 616baa9 commit 596d58c
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 13 deletions.
File renamed without changes.
14 changes: 5 additions & 9 deletions tests/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def test_metric_counter_prompt_tokens(
vllm_model = vllm_runner(model,
dtype=dtype,
disable_log_stats=False,
enforce_eager=True,
gpu_memory_utilization=0.4)
tokenizer = vllm_model.model.get_tokenizer()
prompt_token_counts = [len(tokenizer.encode(p)) for p in example_prompts]
Expand Down Expand Up @@ -60,7 +59,6 @@ def test_metric_counter_generation_tokens(
vllm_model = vllm_runner(model,
dtype=dtype,
disable_log_stats=False,
enforce_eager=True,
gpu_memory_utilization=0.4)
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
tokenizer = vllm_model.model.get_tokenizer()
Expand Down Expand Up @@ -90,7 +88,6 @@ def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str,
vllm_model = vllm_runner(model,
dtype=dtype,
disable_log_stats=False,
enforce_eager=True,
gpu_memory_utilization=0.3,
served_model_name=served_model_name)
stat_logger = vllm_model.model.llm_engine.stat_logger
Expand All @@ -108,7 +105,7 @@ def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str,
f"{served_model_name[0]!r}\n"
f"actual: {metrics_tag_content!r}")

"""

@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [4])
Expand All @@ -121,11 +118,11 @@ async def test_async_engine_log_metrics_regression(
max_tokens: int,
disable_log_stats: bool,
) -> None:
" ""
"""
Regression test ensuring async engine generates metrics
when disable_log_stats=False
(see: https://github.com/vllm-project/vllm/pull/4150#pullrequestreview-2008176678)
" ""
"""
engine_args = AsyncEngineArgs(model=model,
dtype=dtype,
disable_log_stats=disable_log_stats)
Expand All @@ -144,7 +141,6 @@ async def test_async_engine_log_metrics_regression(
len(example_prompts))


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [4])
Expand All @@ -162,15 +158,15 @@ def test_engine_log_metrics_regression(
engine = LLMEngine.from_engine_args(engine_args)
for i, prompt in enumerate(example_prompts):
engine.add_request(
f\"request-id-{i}\",
f"request-id-{i}",
prompt,
SamplingParams(max_tokens=max_tokens),
)
while engine.has_unfinished_requests():
engine.step()

assert_metrics(engine, disable_log_stats, len(example_prompts))
"""


def assert_metrics(engine: LLMEngine, disable_log_stats: bool,
num_requests: int) -> None:
Expand Down
4 changes: 2 additions & 2 deletions tests/worker/test_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_prepare_prompt(batch_size):
dtype=actual.dtype)
torch.testing.assert_close(actual, expected)

"""

@pytest.mark.parametrize("batch_size", list(range(1, 257)))
def test_prepare_decode_cuda_graph(batch_size):
model_runner = _create_model_runner(
Expand Down Expand Up @@ -246,7 +246,7 @@ def test_prepare_decode_cuda_graph(batch_size):
device=actual.device,
dtype=actual.dtype)
torch.testing.assert_close(actual, expected)
"""


def test_empty_seq_group():
"""Verify prepare prompt and decode returns empty output."""
Expand Down
3 changes: 1 addition & 2 deletions tests/worker/test_swap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
from vllm.worker.worker import Worker

"""

def test_swap() -> None:
# Configure the engine.
engine_args = EngineArgs(model="facebook/opt-125m",
Expand Down Expand Up @@ -87,4 +87,3 @@ def test_swap() -> None:
for src, dst in execute_model_req.blocks_to_swap_in:
assert allclose(gpu_key_cache[dst], cpu_key_cache[src])
assert allclose(gpu_value_cache[dst], cpu_value_cache[src])
"""

0 comments on commit 596d58c

Please sign in to comment.