Skip to content

Commit

Permalink
Merge branch 'cz/youtube-transcripts' of https://github.com/langflow-…
Browse files Browse the repository at this point in the history
…ai/langflow into cz/youtube-transcripts
  • Loading branch information
Cristhianzl committed Feb 12, 2025
2 parents c1be04b + 1a801e6 commit 72040dd
Showing 1 changed file with 26 additions and 34 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import pytest
from unittest.mock import Mock, patch

Check failure on line 1 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (INP001)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:1:1: INP001 File `src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py` is part of an implicit namespace package. Add an `__init__.py`.
from youtube_transcript_api import (
TranscriptsDisabled,
NoTranscriptFound,
CouldNotRetrieveTranscript
)

import pytest
from langflow.components.youtube.youtube_transcripts import YouTubeTranscriptsComponent
from langflow.schema import Data, DataFrame, Message
from tests.base import ComponentTestBaseWithoutClient
from youtube_transcript_api import NoTranscriptFound, TranscriptsDisabled


class TestYouTubeTranscriptsComponent(ComponentTestBaseWithoutClient):
Expand Down Expand Up @@ -35,14 +32,8 @@ def file_names_mapping(self):
def mock_transcript_data(self):
"""Return mock transcript data for testing."""
return [
Mock(
page_content="First part of the transcript",
metadata={"start_seconds": 0}
),
Mock(
page_content="Second part of the transcript",
metadata={"start_seconds": 60}
),
Mock(page_content="First part of the transcript", metadata={"start_seconds": 0}),
Mock(page_content="Second part of the transcript", metadata={"start_seconds": 60}),
]

def test_basic_setup(self, component_class, default_kwargs):
Expand All @@ -57,11 +48,11 @@ def test_basic_setup(self, component_class, default_kwargs):
def test_get_dataframe_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data):
"""Test successful DataFrame output generation."""
mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data

component = component_class()
component.set_attributes(default_kwargs)
result = component.get_dataframe_output()

assert isinstance(result, DataFrame)
df = result

Check failure on line 57 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (PD901)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:57:9: PD901 Avoid using the generic variable name `df` for DataFrames
assert len(df) == 2
Expand All @@ -74,23 +65,23 @@ def test_get_dataframe_output_success(self, mock_loader, component_class, defaul
def test_get_message_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data):
"""Test successful Message output generation."""
mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data

component = component_class()
component.set_attributes(default_kwargs)
result = component.get_message_output()

assert isinstance(result, Message)
assert result.text == "First part of the transcript"

@patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader")
def test_get_data_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data):
"""Test successful Data output generation."""
mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data

component = component_class()
component.set_attributes(default_kwargs)
result = component.get_data_output()

assert isinstance(result, Data)
assert result.data["video_url"] == default_kwargs["url"]
assert result.data["transcript"] == "First part of the transcript Second part of the transcript"
Expand All @@ -99,27 +90,28 @@ def test_get_data_output_success(self, mock_loader, component_class, default_kwa
@patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader")
def test_transcript_disabled_error(self, mock_loader, component_class, default_kwargs):
"""Test handling of TranscriptsDisabled error."""

# Mock the load method to raise TranscriptsDisabled
def raise_error(*args, **kwargs):

Check failure on line 95 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (ARG001)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:95:26: ARG001 Unused function argument: `args`

Check failure on line 95 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (ARG001)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:95:34: ARG001 Unused function argument: `kwargs`
raise TranscriptsDisabled("test123")

Check failure on line 96 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (EM101)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:96:39: EM101 Exception must not use a string literal, assign to variable first

mock_loader.from_youtube_url.return_value.load.side_effect = raise_error

component = component_class()
component.set_attributes(default_kwargs)

# Test DataFrame output
df_result = component.get_dataframe_output()
assert isinstance(df_result, DataFrame)
assert len(df_result) == 1 # One row for error message
assert "error" in df_result.columns
assert "Failed to get YouTube transcripts" in df_result["error"][0]

# Test Message output
msg_result = component.get_message_output()
assert isinstance(msg_result, Message)
assert "Failed to get YouTube transcripts" in msg_result.text

# Test Data output
data_result = component.get_data_output()
assert isinstance(data_result, Data)
Expand All @@ -132,16 +124,16 @@ def test_no_transcript_found_error(self, mock_loader, component_class, default_k
video_id = "test123"
requested_langs = ["en"]
transcript_data = {"en": {"translationLanguages": []}}

# Mock the load method to raise NoTranscriptFound
def raise_error(*args, **kwargs):

Check failure on line 129 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (ARG001)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:129:26: ARG001 Unused function argument: `args`

Check failure on line 129 in src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (ARG001)

src/backend/tests/unit/components/bundles/youtube/test_youtube_transcript_component.py:129:34: ARG001 Unused function argument: `kwargs`
raise NoTranscriptFound(video_id, requested_langs, transcript_data)

mock_loader.from_youtube_url.return_value.load.side_effect = raise_error

component = component_class()
component.set_attributes(default_kwargs)

data_result = component.get_data_output()
assert isinstance(data_result, Data)
assert "error" in data_result.data
Expand All @@ -151,7 +143,7 @@ def test_translation_setting(self, component_class):
"""Test setting different translation languages."""
component = component_class()
test_cases = ["en", "es", "fr", ""]

for lang in test_cases:
component.set_attributes({"url": "https://youtube.com/watch?v=test", "translation": lang})
assert component.translation == lang
Expand All @@ -160,15 +152,15 @@ def test_translation_setting(self, component_class):
def test_empty_transcript_handling(self, mock_loader, component_class, default_kwargs):
"""Test handling of empty transcript response."""
mock_loader.from_youtube_url.return_value.load.return_value = []

component = component_class()
component.set_attributes(default_kwargs)

# Test Data output with empty transcript
data_result = component.get_data_output()
assert data_result.data["error"] == "No transcripts found."
assert data_result.data["transcript"] == ""

# Test DataFrame output with empty transcript
df_result = component.get_dataframe_output()
assert len(df_result) == 0
assert len(df_result) == 0

0 comments on commit 72040dd

Please sign in to comment.