-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improved the ffprobe call caching mechanism by storing result to DB a…
…nd using it for indexing and subtitles search.
- Loading branch information
1 parent
887da10
commit 33e1555
Showing
5 changed files
with
140 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,111 @@ | ||
# coding=utf-8 | ||
|
||
import enzyme | ||
from enzyme.exceptions import MalformedMKVError | ||
import logging | ||
import os | ||
import datetime | ||
import pickle | ||
from knowit import api | ||
from subliminal.cache import region | ||
import enzyme | ||
from enzyme.exceptions import MalformedMKVError | ||
from enzyme.exceptions import MalformedMKVError | ||
from database import database | ||
|
||
|
||
def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None): | ||
data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id) | ||
|
||
FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds() | ||
subtitles_list = [] | ||
if data['ffprobe']: | ||
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] | ||
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] | ||
|
||
if 'subtitle' in data['ffprobe']: | ||
for detected_language in data['ffprobe']['subtitle']: | ||
if 'language' in detected_language: | ||
language = detected_language['language'].alpha3 | ||
if language == 'zho' and 'name' in detected_language: | ||
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): | ||
language = 'zht' | ||
if language == 'por' and 'name' in detected_language: | ||
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): | ||
language = 'pob' | ||
forced = detected_language['forced'] if 'forced' in detected_language else False | ||
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ | ||
detected_language else False | ||
codec = detected_language['format'] if 'format' in detected_language else None | ||
subtitles_list.append([language, forced, hearing_impaired, codec]) | ||
else: | ||
continue | ||
elif data['enzyme']: | ||
for subtitle_track in data['enzyme'].subtitle_tracks: | ||
hearing_impaired = False | ||
if subtitle_track.name: | ||
if 'sdh' in subtitle_track.name.lower(): | ||
hearing_impaired = True | ||
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, | ||
subtitle_track.codec_id]) | ||
|
||
class EmbeddedSubsReader: | ||
def __init__(self): | ||
self.ffprobe = None | ||
return subtitles_list | ||
|
||
@region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME) | ||
# file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE! | ||
def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None): | ||
from utils import get_binary | ||
self.ffprobe = get_binary("ffprobe") | ||
|
||
subtitles_list = [] | ||
if self.ffprobe: | ||
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe}) | ||
data = api.know(file) | ||
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None): | ||
# Define default data keys value | ||
data = { | ||
'ffprobe': {}, | ||
'enzyme': {}, | ||
'file_id': episode_file_id if episode_file_id else movie_file_id, | ||
'file_size': file_size | ||
} | ||
|
||
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] | ||
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] | ||
# Get the actual cache value form database | ||
if episode_file_id: | ||
cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?', | ||
(episode_file_id, file_size), only_one=True) | ||
elif movie_file_id: | ||
cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?', | ||
(movie_file_id, file_size), only_one=True) | ||
else: | ||
cache_key = None | ||
|
||
if 'subtitle' in data: | ||
for detected_language in data['subtitle']: | ||
if 'language' in detected_language: | ||
language = detected_language['language'].alpha3 | ||
if language == 'zho' and 'name' in detected_language: | ||
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): | ||
language = 'zht' | ||
if language == 'por' and 'name' in detected_language: | ||
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): | ||
language = 'pob' | ||
forced = detected_language['forced'] if 'forced' in detected_language else False | ||
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ | ||
detected_language else False | ||
codec = detected_language['format'] if 'format' in detected_language else None | ||
subtitles_list.append([language, forced, hearing_impaired, codec]) | ||
else: | ||
continue | ||
# check if we have a value for that cache key | ||
if not isinstance(cache_key, dict): | ||
return data | ||
else: | ||
try: | ||
# Unpickle ffprobe cache | ||
cached_value = pickle.loads(cache_key['ffprobe_cache']) | ||
except: | ||
pass | ||
else: | ||
if os.path.splitext(file)[1] == '.mkv': | ||
with open(file, 'rb') as f: | ||
try: | ||
mkv = enzyme.MKV(f) | ||
except MalformedMKVError: | ||
logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file) | ||
else: | ||
for subtitle_track in mkv.subtitle_tracks: | ||
hearing_impaired = False | ||
if subtitle_track.name: | ||
if 'sdh' in subtitle_track.name.lower(): | ||
hearing_impaired = True | ||
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, | ||
subtitle_track.codec_id]) | ||
# Check if file size and file id matches and if so, we return the cached value | ||
if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]: | ||
return cached_value | ||
|
||
return subtitles_list | ||
# if not, we retrieve the metadata from the file | ||
from utils import get_binary | ||
ffprobe_path = get_binary("ffprobe") | ||
|
||
# if we have ffprobe available | ||
if ffprobe_path: | ||
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path}) | ||
data['ffprobe'] = api.know(file) | ||
# if nto, we use enzyme for mkv files | ||
else: | ||
if os.path.splitext(file)[1] == '.mkv': | ||
with open(file, 'rb') as f: | ||
try: | ||
mkv = enzyme.MKV(f) | ||
except MalformedMKVError: | ||
logging.error( | ||
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ' | ||
'ffmpeg/ffprobe: ' + file) | ||
else: | ||
data['enzyme'] = mkv | ||
|
||
embedded_subs_reader = EmbeddedSubsReader() | ||
# we write to db the result and return the newly cached ffprobe dict | ||
if episode_file_id: | ||
database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?', | ||
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id)) | ||
elif movie_file_id: | ||
database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?', | ||
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id)) | ||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters