This repository has been archived by the owner on Dec 31, 2024. It is now read-only.
generated from childmindresearch/template-python-repository
-
Notifications
You must be signed in to change notification settings - Fork 0
Add speech router and transcribe endpoint #23
Merged
Merged
Changes from 1 commit
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Endpoint definitions for the speech router.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
"""Speech router controller.""" | ||
import logging | ||
import pathlib | ||
import tempfile | ||
|
||
import fastapi | ||
import ffmpeg | ||
from fastapi import status | ||
|
||
from linguaweb_api.core import config | ||
from linguaweb_api.microservices import openai | ||
|
||
settings = config.get_settings() | ||
LOGGER_NAME = settings.LOGGER_NAME | ||
|
||
logger = logging.getLogger(LOGGER_NAME) | ||
|
||
TARGET_FILE_FORMAT = ".mp3" | ||
|
||
|
||
async def transcribe(audio: fastapi.UploadFile) -> str: | ||
"""Transcribes audio using OpenAI's Whisper. | ||
|
||
Args: | ||
audio: The audio file. | ||
|
||
Returns: | ||
str: The transcription of the audio as a string. The string is | ||
stripped of newlines and converted to lowercase. | ||
""" | ||
logger.debug("Transcribing audio.") | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
target_path = pathlib.Path(temp_dir) / f"audio{TARGET_FILE_FORMAT}" | ||
_convert_audio(audio, temp_dir, target_path) | ||
return await openai.SpeechToText().run(target_path) | ||
|
||
|
||
def _convert_audio( | ||
audio: fastapi.UploadFile, | ||
directory: str, | ||
target_path: pathlib.Path, | ||
) -> None: | ||
"""Converts the audio to the target format. | ||
|
||
Args: | ||
audio: The audio file. | ||
directory: The directory to save the audio file to. | ||
target_path: The path to save the audio file to. | ||
""" | ||
if audio.filename is None: | ||
raise fastapi.HTTPException( | ||
status_code=status.HTTP_400_BAD_REQUEST, | ||
detail="The audio file must have a filename.", | ||
) | ||
|
||
extension = pathlib.Path(audio.filename).suffix | ||
if extension == TARGET_FILE_FORMAT: | ||
logger.debug("Audio is already in the correct format.") | ||
with target_path.open("wb") as target_file: | ||
target_file.write(audio.file.read()) | ||
else: | ||
logger.debug("Converting audio to correct format.") | ||
audio_path = pathlib.Path(directory) / f"audio{extension}" | ||
with audio_path.open("wb") as audio_file: | ||
audio_file.write(audio.file.read()) | ||
ffmpeg.input(str(audio_path)).output(str(target_path)).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
"""View definitions for the speech router.""" | ||
import logging | ||
|
||
import fastapi | ||
from fastapi import status | ||
|
||
from linguaweb_api.core import config | ||
from linguaweb_api.routers.speech import controller | ||
|
||
settings = config.get_settings() | ||
LOGGER_NAME = settings.LOGGER_NAME | ||
|
||
logger = logging.getLogger(LOGGER_NAME) | ||
|
||
router = fastapi.APIRouter(prefix="/speech", tags=["speech"]) | ||
|
||
|
||
@router.post( | ||
"/transcribe", | ||
response_model=str, | ||
status_code=status.HTTP_200_OK, | ||
summary="Transcribes audio.", | ||
description="Endpoint that uses OpenAI's Whisper API to transcribe audio.", | ||
responses={ | ||
status.HTTP_400_BAD_REQUEST: { | ||
"description": "The audio file must have a filename.", | ||
}, | ||
}, | ||
) | ||
async def transcribe(audio: fastapi.UploadFile = fastapi.File(...)) -> str: | ||
"""Transcribes audio using OpenAI's Whisper API. | ||
|
||
Args: | ||
audio: The audio file. | ||
|
||
Returns: | ||
The transcription of the audio as a string. | ||
""" | ||
logger.debug("Transcribing audio.") | ||
transcription = controller.transcribe(audio) | ||
logger.debug("Transcribed audio.") | ||
return await transcription |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
"""Tests for the speech endpoints.""" | ||
import array | ||
import tempfile | ||
import wave | ||
from collections.abc import Generator | ||
from typing import Any | ||
|
||
import ffmpeg | ||
import pytest | ||
import pytest_mock | ||
from fastapi import status, testclient | ||
|
||
from linguaweb_api.microservices import openai | ||
from tests.endpoint import conftest | ||
|
||
|
||
@pytest.fixture() | ||
def wav_file() -> Generator[str, Any, None]: | ||
"""Returns a path to a temporary wav file.""" | ||
with tempfile.NamedTemporaryFile(suffix=".wav") as f: | ||
wav = wave.open(f, "w") | ||
wav.setnchannels(1) | ||
wav.setsampwidth(2) | ||
wav.setframerate(44100) | ||
wav.writeframes(array.array("h", [0] * 44100).tobytes()) | ||
wav.close() | ||
yield f.name | ||
|
||
|
||
@pytest.fixture() | ||
def mp3_file(wav_file: str) -> Generator[str, Any, None]: | ||
"""Returns a path to a temporary mp3 file.""" | ||
with tempfile.NamedTemporaryFile(suffix=".mp3") as f: | ||
ffmpeg.input(wav_file).output(f.name).overwrite_output().run() | ||
yield f.name | ||
|
||
|
||
@pytest.fixture() | ||
def files(wav_file: str, mp3_file: str) -> dict[str, str]: | ||
"""Workaround for pytest.mark.parametrize not supporting fixtures.""" | ||
return {"wav": wav_file, "mp3": mp3_file} | ||
|
||
|
||
@pytest.mark.parametrize("file_type", ["wav", "mp3"]) | ||
def test_transcribe( | ||
mocker: pytest_mock.MockerFixture, | ||
client: testclient.TestClient, | ||
endpoints: conftest.Endpoints, | ||
files: dict[str, str], | ||
file_type: str, | ||
) -> None: | ||
"""Tests the transcribe endpoint.""" | ||
expected_transcription = "Expected transcription" | ||
mock_stt_run = mocker.patch.object( | ||
openai.SpeechToText, | ||
"run", | ||
return_value=expected_transcription, | ||
) | ||
|
||
response = client.post( | ||
endpoints.POST_SPEECH_TRANSCRIBE, | ||
files={"audio": open(files[file_type], "rb")}, # noqa: SIM115, PTH123 | ||
) | ||
|
||
mock_stt_run.assert_called_once() | ||
assert response.status_code == status.HTTP_200_OK | ||
assert response.json() == expected_transcription |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd do separate run layers for apt and pip/poetry so it can cache when poetry breaks (way more likely than apt breaking)