-
-
Notifications
You must be signed in to change notification settings - Fork 827
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Caption support #2462
Caption support #2462
Conversation
Some minor diffs to support srts as well. Instead of serving the subtitle files, the subtitle files (vtt,srt) are first parsed and converted to vtts using the diff --git a/internal/api/routes_scene.go b/internal/api/routes_scene.go
index 3ba2ed3d..fc83d5a8 100644
--- a/internal/api/routes_scene.go
+++ b/internal/api/routes_scene.go
@@ -1,6 +1,7 @@
package api
import (
+ "bytes"
"context"
"net/http"
"strconv"
@@ -298,46 +299,49 @@ func (rs sceneRoutes) InteractiveHeatmap(w http.ResponseWriter, r *http.Request)
http.ServeFile(w, r, filepath)
}
-func (rs sceneRoutes) CaptionDE(w http.ResponseWriter, r *http.Request) {
+func (rs sceneRoutes) Caption(w http.ResponseWriter, r *http.Request, lang string) {
s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionDEPath(s.Path)
- serveFileNoCache(w, r, caption)
+
+ caption := scene.GetCaptionPath(s.Path, lang, "vtt") // first try reading vtt
+ sub, err := scene.ReadSubs(caption)
+ if err != nil {
+ caption = scene.GetCaptionPath(s.Path, lang, "srt") // switch to srt if vtt wasnt available or invalid
+ sub, err = scene.ReadSubs(caption)
+ }
+
+ if err == nil {
+ var b bytes.Buffer
+ err = sub.WriteToWebVTT(&b)
+ if err == nil {
+ w.Header().Set("Content-Type", "text/vtt")
+ w.Header().Add("Cache-Control", "no-cache")
+ b.WriteTo(w)
+ }
+ return
+ }
+ logger.Debugf("Error while reading subs: %v", err)
}
+func (rs sceneRoutes) CaptionDE(w http.ResponseWriter, r *http.Request) {
+ rs.Caption(w, r, "de")
+}
func (rs sceneRoutes) CaptionEN(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionENPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "en")
}
-
func (rs sceneRoutes) CaptionES(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionESPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "es")
}
-
func (rs sceneRoutes) CaptionFR(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionFRPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "fr")
}
-
func (rs sceneRoutes) CaptionIT(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionITPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "it")
}
-
func (rs sceneRoutes) CaptionNL(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionNLPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "nl")
}
-
func (rs sceneRoutes) CaptionPT(w http.ResponseWriter, r *http.Request) {
- s := r.Context().Value(sceneKey).(*models.Scene)
- caption := scene.GetCaptionPTPath(s.Path)
- serveFileNoCache(w, r, caption)
+ rs.Caption(w, r, "pt")
}
func (rs sceneRoutes) VttThumbs(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/scene/caption.go b/pkg/scene/caption.go
index 995ea3aa..ca90c1c3 100644
--- a/pkg/scene/caption.go
+++ b/pkg/scene/caption.go
@@ -3,46 +3,19 @@ package scene
import (
"path/filepath"
"strings"
-)
-
-func GetCaptionDEPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".de.vtt"
-}
-
-func GetCaptionENPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".en.vtt"
-}
-func GetCaptionESPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".es.vtt"
-}
-
-func GetCaptionFRPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".fr.vtt"
-}
-
-func GetCaptionITPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".it.vtt"
-}
+ "github.com/asticode/go-astisub"
+)
-func GetCaptionNLPath(path string) string {
+// GetCaptionPath generates the path of a subtitle
+// from a given file path wanted language and subtitle sufffix
+func GetCaptionPath(path, lang, suffix string) string {
ext := filepath.Ext(path)
fn := strings.TrimSuffix(path, ext)
- return fn + ".nl.vtt"
+ return fn + "." + lang + "." + suffix
}
-func GetCaptionPTPath(path string) string {
- ext := filepath.Ext(path)
- fn := strings.TrimSuffix(path, ext)
- return fn + ".pt.vtt"
+// ReadSubs reads a subtitles file
+func ReadSubs(path string) (*astisub.Subtitles, error) {
+ return astisub.OpenFile(path)
} diff --git a/pkg/scene/scan.go b/pkg/scene/scan.go
index be2aa48e..c4fd7ce7 100644
--- a/pkg/scene/scan.go
+++ b/pkg/scene/scan.go
@@ -62,6 +62,9 @@ func (scanner *Scanner) ScanExisting(existing file.FileBased, file file.SourceFi
interactive := getInteractive(path)
captioned := getCaption(file.Path())
+ if captioned {
+ logger.Debugf("Found subtitle for file %s", path)
+ }
oldHash := s.GetHash(scanner.FileNamingAlgorithm)
changed := false
@@ -342,33 +345,16 @@ func getInteractive(path string) bool {
}
func getCaption(path string) bool {
- _, err := os.Stat(GetCaptionDEPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionENPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionESPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionFRPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionITPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionNLPath(path))
- if err == nil {
- return true
- }
- _, err = os.Stat(GetCaptionPTPath(path))
- if err == nil {
- return true
+ langs := []string{"de", "en", "es", "fr", "it", "nl", "pt"}
+ for _, l := range langs {
+ _, err := os.Stat(GetCaptionPath(path, l, "vtt"))
+ if err != nil {
+ _, err = os.Stat(GetCaptionPath(path, l, "srt"))
+ }
+ if err == nil {
+ return true
+ }
+
}
return false
} |
I think we'll need to save the caption file paths to the database so we aren't guessing where to find the caption in |
Pasting input from the channel
And
I think we have a few different options to choose from for the subtitles detection.
In any case only a list of available subtitles and their label/lang should be sent over to videojs. Regarding the issue with live transcoding i think we might need to check if the files are live transcoded or directly streamed. If they are live transcoded then before serving the sub we can use the subtitle library to add a delay (equal to the seek point) to the subtitles. Adding the delay is easy not sure how to do the first part though |
What do you consider a lot of files in a directory? I imagine some scene folders could contain up to 90 supporting files. If we didn’t add the subtitle paths into the DB, would there be another way in For offsetting the subtitles, I think we could use https://github.com/funkyremi/vtt-live-edit. |
I could use an extra set of eyes on this. I'm having issues with the 'vtt-live-edit' library I've imported. It looks like the library was imported properly since the IDE nor the web console is throwing any errors. However, it doesn't look like the calls I'm making to the libraries' functions are not doing anything. It's not clear if I'm missing something or if the library just doesn't work. For example, I made a simple function call to change the subtitle font color to red but the font color was not affected at all. |
pasting from the channel My ts/js knowledge is very limited:-(... From a quick look it seems that the library you added to the PR is meant for video elements and the native vtt support. Afaik videojs uses its own vtt.js library for most cases and doesnt switch to native unless for specific browsers ...
The library you used counts on the ::cue to set the foreground so thats why you dont get the color change var settings = (player as any).textTrackSettings;
settings.setValues({
"color": "#F00",
});
settings.updateDisplay(); instead of your
With some googling i found (in the changelog from version 4.6.0 of videojs) this |
btw can you apply diff --git a/internal/api/routes_scene.go b/internal/api/routes_scene.go
index 706da306..47914085 100644
--- a/internal/api/routes_scene.go
+++ b/internal/api/routes_scene.go
@@ -301,7 +301,7 @@ func (rs sceneRoutes) InteractiveHeatmap(w http.ResponseWriter, r *http.Request)
func (rs sceneRoutes) Caption(w http.ResponseWriter, r *http.Request, lang string) {
s := r.Context().Value(sceneKey).(*models.Scene)
-
+
caption := scene.GetCaptionPath(s.Path, lang, "vtt") // first try reading vtt
sub, err := scene.ReadSubs(caption)
if err != nil {
@@ -315,7 +315,7 @@ func (rs sceneRoutes) Caption(w http.ResponseWriter, r *http.Request, lang strin
if err == nil {
w.Header().Set("Content-Type", "text/vtt")
w.Header().Add("Cache-Control", "no-cache")
- b.WriteTo(w)
+ _, _ = b.WriteTo(w)
}
return
}
diff --git a/pkg/scene/caption.go b/pkg/scene/caption.go
index d971e8fd..ca90c1c3 100644
--- a/pkg/scene/caption.go
+++ b/pkg/scene/caption.go
@@ -18,4 +18,4 @@ func GetCaptionPath(path, lang, suffix string) string {
// ReadSubs reads a subtitles file
func ReadSubs(path string) (*astisub.Subtitles, error) {
return astisub.OpenFile(path)
-}
\ No newline at end of file
+} ? EDIT
My worst case scenario would be a user having a flat directory structure for his collection eg all scenes (or say a big percentage) in a single directory! In that case the readdir and regex method i mentioned above would probably be inneficient for detecting subs. |
According to videojs/video.js#4212 it sounds like your find from version 4.6.0 was later removed. I can try your suggested alternative from discord:
No problem. I can go ahead and apply the change you posted.
Yes, that was what I was thinking. Users who don't use subtitles would always have that detection disabled. Whereas users that do would turn that option on most likely during selective scans if only a few scenes provide them. |
I just tried viewing the captioned scene on my phone and it looks like the VTT library appears to be working there a changed the subtitle text to red. Still, a solution that works everywhere would be ideal. |
What scenario is the filter breaking for you? From my testing, it works the same as how the Studio Alias filter works, which was the intent. If I wanted to find scenes with English filters, I would provide a |
I think it was the is null / not null |
Ah, okay, I thought that might be the case. Couldn't that be solved by making captions nullable in the database? |
is not, excludes, not matches regex are also broken or at least dont work as expected |
Ah okay, I'll go ahead and limit the filter capability as you suggested so we can get this change merged sooner. |
You could try the not nullable you mention. I think the issue is when you join on the language_code column (that is not nullable) Edit: |
Okay, when I get some time, I'll look at the filter and see if it can be fixed with minimal changes. If not, I'll just dumb down the filter. |
I did some testing with the studio alias filter and saw that it also does not work correctly. After looking at the table again, I don't think it makes sense to make |
graphql/schema/types/filters.graphql
Outdated
@@ -174,6 +174,8 @@ input SceneFilterType { | |||
interactive: Boolean | |||
"""Filter by InteractiveSpeed""" | |||
interactive_speed: IntCriterionInput | |||
"""Filter to only include scenes which have captions. `true` or `false`""" | |||
captioned: String |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the values are only true
or false
, then it should be a boolean. I think however, that the filtering should probably be a StringCriterionInput
or something so that one can filter on caption language etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was a StringCriterionInput initially similar to the studio alias filter. However, we noticed that approach inherited some issues the studio alias filter has. The current implementation was copied from the has markers
filter, which also uses strings to define a Boolean, but I can look into updating this to use a Boolean.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having had a quick look, I think the StringCriterionInput
is probably better. I'd prefer to use that and fix the issues arising out of that than have to change the graphql schema later when we figure it out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good. I'll revert the change to continue using the StringCriterionInput
.
CREATE TABLE `scene_captions` ( | ||
`scene_id` integer, | ||
`language_code` varchar(255) NOT NULL, | ||
`path` varchar(255) NOT NULL, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is path really necessary? Can't we infer the path from the language code and caption type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose the path could be reconstructed with the language_code and caption type.
@@ -85,6 +85,8 @@ input ScanMetadataInput { | |||
scanGeneratePhashes: Boolean | |||
"""Generate image thumbnails during scan""" | |||
scanGenerateThumbnails: Boolean | |||
"""Detect scene captions""" | |||
scanDetectCaptions: Boolean |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the benefit of making this optional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this was this intentional. I can update it to be required.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, what I mean is - is there any reason not to have the scan task always detect captions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The detect captions toggle was introduced when the code was still looking for captions by trying various name combinations to see if we could get a match. That approach was expensive, which led to this toggle being added so users without captions wouldn't be impacted by that search when it didn't benefit them. The toggle could probably be removed now that the search code is more intelligent.
Partial revert of 75e8bfb
Perfect, your recent push addresses the regression. Also, I love what you did with the caption filter. |
Fixes #495
This pull request adds caption support for DE, EN, ES, FR, IT, NL, and PT VTT files. As far as I'm aware those are the only languages that are captioned in adult content. This pull request only adds support for VTT files since video.js only supports VTT files but we could support SRT files later by converting the files to VTT on the fly, see videojs/video.js#5923.