Skip to content

Commit

Permalink
Fix Qwen2-Audio-Instruct (#3298)
Browse files Browse the repository at this point in the history
  • Loading branch information
ImKeTT authored Jan 29, 2025
1 parent 2b1f0d7 commit f2e8ee1
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ audiolm =
# For HuggingFace audio datasets
soundfile~=0.12
librosa~=0.10
einops~=0.7.0

# For LLaMA-Omni
openai-whisper==20240930
Expand Down
4 changes: 2 additions & 2 deletions src/helm/clients/audio_language/qwen2_audiolm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def make_request(self, request: Request) -> RequestResult:
for media_num, media_object in enumerate(request.multimodal_prompt.media_objects):
if media_object.is_type("audio") and media_object.location:
assert media_object.is_local_file, "Only local audio files are supported"
query.append({"type": "audio", "audio_loc": media_object.location})
query.append({"type": "audio", "audio_url": media_object.location})

prompt_text += f"<|im_start|>user\nAudio {media_num+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"
elif media_object.is_type(TEXT_TYPE):
Expand Down Expand Up @@ -131,7 +131,7 @@ def do_it() -> Dict[str, Any]:
if element["type"] == "audio":
audios.append(
librosa.load(
element["audio_loc"],
element["audio_url"],
sr=tokenizer.feature_extractor.sampling_rate,
)[0]
)
Expand Down

0 comments on commit f2e8ee1

Please sign in to comment.