Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always reencode using our presets (even for high quality) and choose best format #373

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Raise exception if there are no videos in the playlists (#347)
- Drop `--type` CLI argument and guess `--id` type (#361)
- Always reencode using our presets (even for high quality) and choose best format when downloading from Youtube (#356)

### Fixed

Expand Down
8 changes: 2 additions & 6 deletions scraper/src/youtube2zim/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
return True


def post_process_video(video_dir, video_id, preset, video_format, low_quality):
def post_process_video(video_dir, video_id, preset, video_format):

Check warning on line 31 in scraper/src/youtube2zim/processing.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/processing.py#L31

Added line #L31 was not covered by tests
"""apply custom post-processing to downloaded video

- resize thumbnail
- recompress video if incorrect video_format or low_quality requested"""
- recompress video"""

# find downloaded video from video_dir
files = [
Expand All @@ -52,10 +52,6 @@
)
src_path = files[0]

# don't reencode if not requesting low-quality and received wanted format
if not low_quality and src_path.suffix[1:] == video_format:
return

dst_path = src_path.with_name(f"video.{video_format}")
logger.info(f"Reencode video to {dst_path}")
success, process = reencode(
Expand Down
26 changes: 17 additions & 9 deletions scraper/src/youtube2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
from zimscraperlib.image.probing import get_colors, is_hex_color
from zimscraperlib.image.transformation import resize_image
from zimscraperlib.inputs import compute_descriptions
from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow
from zimscraperlib.video.presets import (

Check warning on line 34 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L34

Added line #L34 was not covered by tests
VideoMp4High,
VideoMp4Low,
VideoWebmHigh,
VideoWebmLow,
)
from zimscraperlib.zim import Creator
from zimscraperlib.zim.filesystem import validate_zimfile_creatable
from zimscraperlib.zim.indexing import IndexData
Expand Down Expand Up @@ -586,10 +591,6 @@
self.videos_ids = [*all_videos.keys()] # unpacking so it's subscriptable

def download_video_files(self, max_concurrency):
audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[
self.video_format
]

# prepare options which are shared with every downloader
options = {
"cachedir": self.videos_dir,
Expand All @@ -607,8 +608,7 @@
# "external_downloader_args": ["--max-tries=20", "--retry-wait=30"],
"outtmpl": str(self.videos_dir.joinpath("%(id)s", "video.%(ext)s")),
"preferredcodec": self.video_format,
"format": f"bestvideo*[ext={vidext}]+bestaudio[ext={audext}]/"
"bestvideo*+bestaudio/best",
"format": "bestvideo*+bestaudio/best",
"y2z_videos_dir": self.videos_dir,
}
if self.all_subtitles:
Expand Down Expand Up @@ -711,7 +711,16 @@
def download_video(self, video_id, options):
"""download the video from cache/youtube and return True if successful"""

preset = {"mp4": VideoMp4Low}.get(self.video_format, VideoWebmLow)()
preset = {

Check warning on line 714 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L714

Added line #L714 was not covered by tests
"mp4": VideoMp4Low if self.low_quality else VideoMp4High,
"webm": VideoWebmLow if self.low_quality else VideoWebmHigh,
}.get(self.video_format)
if not preset:
raise Exception(

Check warning on line 719 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L719

Added line #L719 was not covered by tests
f"Impossible to find preset for {self.video_format} video format "
f"(low quality: {self.low_quality})"
)
preset = preset()

Check warning on line 723 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L723

Added line #L723 was not covered by tests
options_copy = options.copy()
video_location = options_copy["y2z_videos_dir"].joinpath(video_id)
video_path = video_location.joinpath(f"video.{self.video_format}")
Expand Down Expand Up @@ -746,7 +755,6 @@
video_id,
preset,
self.video_format,
self.low_quality,
)
self.add_file_to_zim(
zim_path, video_path, callback=(delete_callback, video_path)
Expand Down