Skip to content

Commit

Permalink
support another language to transcript
Browse files Browse the repository at this point in the history
  • Loading branch information
rizkypujiraharja authored and corny committed Jan 1, 2024
1 parent 2e06f1f commit c38aa71
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 10 deletions.
8 changes: 4 additions & 4 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ func (c *Client) videoDataByInnertube(ctx context.Context, id string) ([]byte, e
return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/player?key="+c.client.key, data)
}

func (c *Client) transcriptDataByInnertube(ctx context.Context, id string) ([]byte, error) {
func (c *Client) transcriptDataByInnertube(ctx context.Context, id string, lang string) ([]byte, error) {
data := innertubeRequest{
Context: prepareInnertubeContext(*c.client),
Params: transcriptVideoID(id),
Params: transcriptVideoID(id, lang),
}

return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/get_transcript?key="+c.client.key, data)
Expand Down Expand Up @@ -261,8 +261,8 @@ func prepareInnertubePlaylistData(ID string, continuation bool, clientInfo clien
}

// transcriptVideoID encodes the video ID to the param used to fetch transcripts.
func transcriptVideoID(videoID string) string {
langCode := encTranscriptLang("en")
func transcriptVideoID(videoID string, lang string) string {
langCode := encTranscriptLang(lang)

// This can be optionally appened to the Sprintf str, not sure what it means
// *3engagement-panel-searchable-transcript-search-panel\x30\x00\x38\x01\x40\x01
Expand Down
8 changes: 4 additions & 4 deletions transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,22 @@ func (vt VideoTranscript) String() string {
//
// Not all videos have transcripts, only relatively new videos.
// If transcripts are disabled or not available, ErrTranscriptDisabled is returned.
func (c *Client) GetTranscript(video *Video) (VideoTranscript, error) {
return c.GetTranscriptCtx(context.Background(), video)
func (c *Client) GetTranscript(video *Video, lang string) (VideoTranscript, error) {
return c.GetTranscriptCtx(context.Background(), video, lang)
}

// GetTranscriptCtx fetches the video transcript if available.
//
// Not all videos have transcripts, only relatively new videos.
// If transcripts are disabled or not available, ErrTranscriptDisabled is returned.
func (c *Client) GetTranscriptCtx(ctx context.Context, video *Video) (VideoTranscript, error) {
func (c *Client) GetTranscriptCtx(ctx context.Context, video *Video, lang string) (VideoTranscript, error) {
c.assureClient()

if video == nil || video.ID == "" {
return nil, fmt.Errorf("no video provided")
}

body, err := c.transcriptDataByInnertube(ctx, video.ID)
body, err := c.transcriptDataByInnertube(ctx, video.ID, lang)
if err != nil {
return nil, err
}
Expand Down
24 changes: 23 additions & 1 deletion transcript_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,29 @@ import (
func TestTranscript(t *testing.T) {
video := &Video{ID: "9_MbW9FK1fA"}

transcript, err := testClient.GetTranscript(video)
transcript, err := testClient.GetTranscript(video, "en")
require.NoError(t, err, "get transcript")
require.Greater(t, len(transcript), 0, "no transcript segments found")

for i, segment := range transcript {
index := strconv.Itoa(i)

require.NotEmpty(t, segment.Text, "text "+index)
require.NotEmpty(t, segment.Duration, "duration "+index)
require.NotEmpty(t, segment.OffsetText, "offset "+index)

if i != 0 {
require.NotEmpty(t, segment.StartMs, "startMs "+index)
}
}

t.Log(transcript.String())
}

func TestTranscriptOtherLanguage(t *testing.T) {
video := &Video{ID: "AXwDvYh2-uk"}

transcript, err := testClient.GetTranscript(video, "id")
require.NoError(t, err, "get transcript")
require.Greater(t, len(transcript), 0, "no transcript segments found")

Expand Down
2 changes: 1 addition & 1 deletion video_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func TestSimpleTest(t *testing.T) {
video, err := testClient.GetVideo("https://www.youtube.com/watch?v=9_MbW9FK1fA")
require.NoError(t, err, "get body")

_, err = testClient.GetTranscript(video)
_, err = testClient.GetTranscript(video, "en")
require.NoError(t, err, "get transcript")

// Typically youtube only provides separate streams for video and audio.
Expand Down

0 comments on commit c38aa71

Please sign in to comment.