Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jhakulin committed Feb 9, 2025
1 parent 1c664ae commit 4dcdb41
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 42 deletions.
33 changes: 23 additions & 10 deletions samples/async/sample_realtime_ai_with_keyword_and_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,12 @@ async def on_response_output_item_added(self, event: ResponseOutputItemAdded) ->
# Properly acquire the lock with 'await' and spread the usage over two lines
await self._lock.acquire() # Wait until the lock is available, then acquire it
try:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered function call. Call ID: {call_id}, Function Name: {function_name}")
# Only register the call_id if we haven't seen it before
if call_id in self._call_id_to_function_name:
logger.debug(f"Ignoring duplicated function call registration for call_id: {call_id}")
else:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered new function call. Call ID: {call_id}, Function Name: {function_name}")
finally:
# Ensure the lock is released even if an exception occurs
self._lock.release()
Expand Down Expand Up @@ -371,7 +375,6 @@ async def main():
audio_capture = None

try:

azure_openai_endpoint, api_key, azure_api_version = get_openai_configuration()
if not api_key:
return
Expand Down Expand Up @@ -411,25 +414,35 @@ async def main():
await client.start()

loop = asyncio.get_running_loop()

audio_capture_event_handler = MyAudioCaptureEventHandler(
client=client,
event_handler=event_handler,
event_loop=loop,
)
vad_parameters={

# Set local VAD parameters depending on the VAD model used
if USE_SILERO_VAD_MODEL:
logger.info("Using Silero VAD...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_size_samples": 512,
"threshold": 0.25,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0,
"model_path": str(RESOURCES_DIR / "silero_vad.onnx")
}
else:
logger.info("Using VoiceActivityDetector...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_duration": 1.5,
"silence_ratio": 1.5,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0
}
if USE_SILERO_VAD_MODEL:
logger.info("using Silero VAD...")
vad_parameters["model_path"] = str(RESOURCES_DIR / "silero_vad.onnx")
else:
logger.info("using VoiceActivityDetector...")

# Initialize AudioCapture with the event handler
audio_capture = AudioCapture(
Expand Down
32 changes: 23 additions & 9 deletions samples/async/sample_realtime_ai_with_local_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,12 @@ async def on_response_output_item_added(self, event: ResponseOutputItemAdded) ->
# Properly acquire the lock with 'await' and spread the usage over two lines
await self._lock.acquire() # Wait until the lock is available, then acquire it
try:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered function call. Call ID: {call_id}, Function Name: {function_name}")
# Only register the call_id if we haven't seen it before
if call_id in self._call_id_to_function_name:
logger.debug(f"Ignoring duplicated function call registration for call_id: {call_id}")
else:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered new function call. Call ID: {call_id}, Function Name: {function_name}")
finally:
# Ensure the lock is released even if an exception occurs
self._lock.release()
Expand Down Expand Up @@ -319,25 +323,35 @@ async def main():
await client.start()

loop = asyncio.get_running_loop()

audio_capture_event_handler = MyAudioCaptureEventHandler(
client=client,
event_handler=event_handler,
event_loop=loop,
)
vad_parameters={

# Set local VAD parameters depending on the VAD model used
if USE_SILERO_VAD_MODEL:
logger.info("Using Silero VAD...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_size_samples": 512,
"threshold": 0.25,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0,
"model_path": str(RESOURCES_DIR / "silero_vad.onnx")
}
else:
logger.info("Using VoiceActivityDetector...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_duration": 1.5,
"silence_ratio": 1.5,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0
}
if USE_SILERO_VAD_MODEL:
logger.info("using Silero VAD...")
vad_parameters["model_path"] = str(RESOURCES_DIR / "silero_vad.onnx")
else:
logger.info("using VoiceActivityDetector...")

# Initialize AudioCapture with the event handler
audio_capture = AudioCapture(
Expand Down
8 changes: 6 additions & 2 deletions samples/sample_realtime_ai_text_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,12 @@ def on_response_output_item_added(self, event: ResponseOutputItemAdded):
function_name = event.item.get("name")
if call_id and function_name:
with self._lock:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered function call. Call ID: {call_id}, Function Name: {function_name}")
# Only register the call_id if we haven't seen it before
if call_id in self._call_id_to_function_name:
logger.debug(f"Ignoring duplicated function call registration for call_id: {call_id}")
else:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered new function call. Call ID: {call_id}, Function Name: {function_name}")
else:
logger.warning("Function call item missing 'call_id' or 'name' fields.")

Expand Down
30 changes: 22 additions & 8 deletions samples/sample_realtime_ai_with_keyword_and_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,12 @@ def on_response_output_item_added(self, event: ResponseOutputItemAdded):
function_name = event.item.get("name")
if call_id and function_name:
with self._lock:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered function call. Call ID: {call_id}, Function Name: {function_name}")
# Only register the call_id if we haven't seen it before
if call_id in self._call_id_to_function_name:
logger.debug(f"Ignoring duplicated function call registration for call_id: {call_id}")
else:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered new function call. Call ID: {call_id}, Function Name: {function_name}")
else:
logger.warning("Function call item missing 'call_id' or 'name' fields.")

Expand Down Expand Up @@ -390,19 +394,29 @@ def main():
client=client,
event_handler=event_handler
)
vad_parameters={

# Set local VAD parameters depending on the VAD model used
if USE_SILERO_VAD_MODEL:
logger.info("Using Silero VAD...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_size_samples": 512,
"threshold": 0.25,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0,
"model_path": str(RESOURCES_DIR / "silero_vad.onnx")
}
else:
logger.info("Using VoiceActivityDetector...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_duration": 1.5,
"silence_ratio": 1.5,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0
}
if USE_SILERO_VAD_MODEL:
logger.info("using Silero VAD...")
vad_parameters["model_path"] = str(RESOURCES_DIR / "silero_vad.onnx")
else:
logger.info("using VoiceActivityDetector...")

# Initialize AudioCapture with the event handler
audio_capture = AudioCapture(
Expand Down
30 changes: 22 additions & 8 deletions samples/sample_realtime_ai_with_local_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,12 @@ def on_response_output_item_added(self, event: ResponseOutputItemAdded):
function_name = event.item.get("name")
if call_id and function_name:
with self._lock:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered function call. Call ID: {call_id}, Function Name: {function_name}")
# Only register the call_id if we haven't seen it before
if call_id in self._call_id_to_function_name:
logger.debug(f"Ignoring duplicated function call registration for call_id: {call_id}")
else:
self._call_id_to_function_name[call_id] = function_name
logger.debug(f"Registered new function call. Call ID: {call_id}, Function Name: {function_name}")
else:
logger.warning("Function call item missing 'call_id' or 'name' fields.")

Expand Down Expand Up @@ -304,19 +308,29 @@ def main():
client=client,
event_handler=event_handler
)
vad_parameters={

# Set local VAD parameters depending on the VAD model used
if USE_SILERO_VAD_MODEL:
logger.info("Using Silero VAD...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_size_samples": 512,
"threshold": 0.25,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0,
"model_path": str(RESOURCES_DIR / "silero_vad.onnx")
}
else:
logger.info("Using VoiceActivityDetector...")
vad_parameters = {
"sample_rate": 24000,
"chunk_size": 1024,
"window_duration": 1.5,
"silence_ratio": 1.5,
"min_speech_duration": 0.3,
"min_silence_duration": 1.0
}
if USE_SILERO_VAD_MODEL:
logger.info("using Silero VAD...")
vad_parameters["model_path"] = str(RESOURCES_DIR / "silero_vad.onnx")
else:
logger.info("using VoiceActivityDetector...")

# Initialize AudioCapture with the event handler
audio_capture = AudioCapture(
Expand Down
4 changes: 2 additions & 2 deletions samples/utils/audio_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(

if vad_parameters is not None:
try:
if vad_parameters.get("model_path"):
if "model_path" in vad_parameters and isinstance(vad_parameters["model_path"], str) and vad_parameters["model_path"].strip():
self.vad = SileroVoiceActivityDetector(**vad_parameters)
else:
self.vad = VoiceActivityDetector(**vad_parameters)
Expand Down Expand Up @@ -160,7 +160,7 @@ def start(self):
except Exception as e:
logger.error(f"Failed to start AzureKeywordRecognizer: {e}")

# ensure the pyaudio instance is initialized
# Ensure the PyAudio instance is initialized
if not self.pyaudio_instance:
self.pyaudio_instance = pyaudio.PyAudio()

Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="realtime-ai",
version="0.1.8",
version="0.1.9",
description="Python SDK for real-time audio processing with OpenAI's Realtime REST API.",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
Expand All @@ -12,10 +12,9 @@
package_dir={"": "src"},
install_requires=[
"pyaudio",
"numpy",
"numpy>=1.24,<2.2", # Ensures compatibility with numba
"websockets",
"websocket-client",
"azure-cognitiveservices-speech",
],
classifiers=[
"Programming Language :: Python :: 3",
Expand Down

0 comments on commit 4dcdb41

Please sign in to comment.