Skip to content

Commit

Permalink
[Deepseek r1] update scripts to make sure it runs with default env var (
Browse files Browse the repository at this point in the history
HabanaAI#790)

Signed-off-by: Chendi Xue <[email protected]>
  • Loading branch information
xuechendi authored and yangulei committed Feb 7, 2025
1 parent 33fc2c7 commit d4ad5fb
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 2 deletions.
1 change: 1 addition & 0 deletions examples/offline_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

os.environ["HABANA_VISIBLE_DEVICES"] = "ALL"
os.environ["PT_HPU_ENABLE_LAZY_COLLECTIVES"] = "true"
os.environ["VLLM_MLA_DISABLE_REQUANTIZATION"] = "1"
os.environ["VLLM_RAY_DISABLE_LOG_TO_DRIVER"] = "1"
os.environ["RAY_IGNORE_UNHANDLED_ERRORS"] = "1"

Expand Down
6 changes: 6 additions & 0 deletions scripts/profile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
cur_path=$(pwd)
HABANA_PROFILE_WRITE_HLTV=1 HABANA_PROFILE=1
VLLM_PT_PROFILE=decode_128_1024_t \
HABANA_PROF_CONFIG=${cur_path}/profile_api_trace_analyzer.json \
python ${cur_path}/run_example_tp.py
65 changes: 65 additions & 0 deletions scripts/profile_api_trace_analyzer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"Plugins": [
{
"enable": true,
"lib": "libhost_profiler.so",
"name": "HostProfiler",
"values": {
"start_disabled": {
"value": true
}
}
},
{
"enable": true,
"lib": "libhw_trace.so",
"name": "HwTrace",
"values": {
"archProfileUnits": {
"gaudi3": {
"CS": {
"Gaudi3CSAdvancedProfiling": {
"value": 0
}
},
"NIC": {
"enable": {
"value": true
}
}
},
"gaudi2": {
"FW_events": {
"enable": {
"value": true
}
},
"NIC": {
"enable": {
"value": true
}
}
}
},
"generalOptions": {
"arch": {
"value": "gaudi3"
},
"profilePhase": {
"value": "profileApi"
},
"traceBufferSize": {
"value": "0x8000000"
}
},
"parseOptions": {
"traceAnalyzer": {
"traceAnalyzerXlsx": {
"value": true
}
}
}
}
}
]
}
2 changes: 1 addition & 1 deletion vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
# matrices to match the activation type. This can lead to higher memory and
# compute usage but better preserves the accuracy of the original model.
"VLLM_MLA_DISABLE_REQUANTIZATION":
lambda: bool(int(os.getenv("VLLM_MLA_DISABLE_REQUANTIZATION", "0")))
lambda: bool(int(os.getenv("VLLM_MLA_DISABLE_REQUANTIZATION", "1")))
}

# end-env-vars-definition
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
def __init__(self, quant_config: Fp8Config):
self.quant_config = quant_config
self.block_quant = self.quant_config.weight_block_size is not None
self.moe_n_slice = int(os.environ.get("VLLM_MOE_N_SLICE", 1))
self.moe_n_slice = int(os.environ.get("VLLM_MOE_N_SLICE", 4))

def create_weights(self, layer: Module, num_experts: int, hidden_size: int,
intermediate_size_per_partition: int,
Expand Down

0 comments on commit d4ad5fb

Please sign in to comment.