Skip to content

Commit

Permalink
feat(audio): 改进音频合并功能,支持 OST 设置,提升时间戳精度
Browse files Browse the repository at this point in the history
-重构了 merge_audio_files 函数,增加了对 OST 设置的支持
- 新增 time_to_seconds 函数,支持多种时间格式的转换
- 修改了 audio_merger 模块的逻辑,根据 OST 设置处理音频
- 更新了 task 模块中的 start_subclip 函数,传入 OST 信息
- 优化了 subtitle 和 video 模块的逻辑,适应新的音频处理方式
  • Loading branch information
linyqh committed Nov 27, 2024
1 parent c03a13d commit 401eb92
Show file tree
Hide file tree
Showing 10 changed files with 562 additions and 408 deletions.
2 changes: 2 additions & 0 deletions app/models/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ class VideoClipParams(BaseModel):
custom_position: float = Field(default=70.0, description="自定义位置")

n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
tts_volume: float = 1.0 # TTS音频音量
video_volume: float = 0.1 # 视频原声音量

class VideoTranscriptionRequest(BaseModel):
video_name: str
Expand Down
170 changes: 97 additions & 73 deletions app/services/audio_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,95 +18,119 @@ def check_ffmpeg():
return False


def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list):
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
"""
合并多个音频文件到一个指定总时长的音频文件中,并生成相应的字幕
:param task_id: 任务ID
:param audio_file_paths: 音频文件路径列表
:param total_duration: 最终音频文件的总时长(秒)
:param video_script: JSON格式的视频脚本
合并音频文件,根据OST设置处理不同的音频轨道
Args:
task_id: 任务ID
audio_files: TTS生成的音频文件列表
total_duration: 总时长
list_script: 完整脚本信息,包含OST设置
Returns:
str: 合并后的音频文件路径
"""
output_dir = utils.task_dir(task_id)

# 检查FFmpeg是否安装
if not check_ffmpeg():
logger.error("错误:FFmpeg未安装。请安装FFmpeg后再运行此脚本。")
return None, None

# 创建一个总时长为total_duration的空白音频
blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒
logger.error("FFmpeg未安装,无法合并音频文件")
return None

for audio_path in audio_file_paths:
if not os.path.exists(audio_path):
logger.info(f"警告:文件 {audio_path} 不存在,已跳过。")
continue

# 从文件名中提取时间戳
filename = os.path.basename(audio_path)
start_time, end_time = extract_timestamp(filename)
# 创建一个空的音频片段
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位

# 读取音频文件
# 遍历脚本中的每个片段
for segment, audio_file in zip(list_script, audio_files):
try:
audio = AudioSegment.from_mp3(audio_path)
except Exception as e:
logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}")
continue

# 将音频插入到空白音频的指定位置
blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
# 加载TTS音频文件
tts_audio = AudioSegment.from_file(audio_file)

# 获取片段的开始和结束时间
start_time, end_time = segment['new_timestamp'].split('-')
start_seconds = utils.time_to_seconds(start_time)
end_seconds = utils.time_to_seconds(end_time)

# 根据OST设置处理音频
if segment['OST'] == 0:
# 只使用TTS音频
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
elif segment['OST'] == 1:
# 只使用原声(假设原声已经在视频中)
continue
elif segment['OST'] == 2:
# 混合TTS音频和原声
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
mixed_audio = original_audio.overlay(tts_audio)
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)

# 尝试导出为WAV格式
try:
output_file = os.path.join(output_dir, "audio.wav")
blank_audio.export(output_file, format="wav")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.info(f"导出为WAV格式失败,尝试使用MP3格式:{str(e)}")
try:
output_file = os.path.join(output_dir, "audio.mp3")
blank_audio.export(output_file, format="mp3", codec="libmp3lame")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.error(f"导出音频失败:{str(e)}")
return None, None
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
continue

return output_file
# 保存合并后的音频文件
output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
final_audio.export(output_audio_path, format="mp3")
logger.info(f"合并后的音频文件已保存: {output_audio_path}")

def parse_timestamp(timestamp: str):
"""解析时间戳字符串为秒数"""
# 确保使用冒号作为分隔符
timestamp = timestamp.replace('_', ':')
return time_to_seconds(timestamp)

def extract_timestamp(filename):
"""从文件名中提取开始和结束时间戳"""
# 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06-00_24" 部分
start_time, end_time = time_part.split('-') # 分割成 "00_06" 和 "00_24"

# 将下划线格式转换回冒号格式
start_time = start_time.replace('_', ':')
end_time = end_time.replace('_', ':')

# 将时间戳转换为秒
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)

return start_seconds, end_seconds
return output_audio_path


def time_to_seconds(time_str):
"""将 "00:06" 或 "00_06" 格式转换为总秒数"""
# 确保使用冒号作为分隔符
time_str = time_str.replace('_', ':')
"""
将时间字符串转换为秒数,支持多种格式:
1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
2. 'MM:SS,mmm' (分:秒,毫秒)
3. 'SS,mmm' (秒,毫秒)
"""
try:
parts = time_str.split(':')
if len(parts) != 2:
logger.error(f"Invalid time format: {time_str}")
return 0
return int(parts[0]) * 60 + int(parts[1])
# 处理毫秒部分
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = float(ms_part) / 1000
else:
time_part = time_str
ms = 0

# 分割时间部分
parts = time_part.split(':')

if len(parts) == 3: # HH:MM:SS
h, m, s = map(int, parts)
seconds = h * 3600 + m * 60 + s
elif len(parts) == 2: # MM:SS
m, s = map(int, parts)
seconds = m * 60 + s
else: # SS
seconds = int(parts[0])

return seconds + ms
except (ValueError, IndexError) as e:
logger.error(f"Error parsing time {time_str}: {str(e)}")
return 0
return 0.0


def extract_timestamp(filename):
"""
从文件名中提取开始和结束时间戳
例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8)
"""
try:
# 从文件名中提取时间部分
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分
start_time, end_time = time_part.split('-') # 分割成开始和结束时间

# 将下划线格式转换回冒号格式
start_time = start_time.replace('_', ':')
end_time = end_time.replace('_', ':')

# 将时间戳转换为秒
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)

return start_seconds, end_seconds
except Exception as e:
logger.error(f"Error extracting timestamp from {filename}: {str(e)}")
return 0.0, 0.0


if __name__ == "__main__":
Expand Down
73 changes: 53 additions & 20 deletions app/services/material.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import random
import traceback
from urllib.parse import urlencode
from datetime import datetime

import requests
from typing import List
Expand Down Expand Up @@ -253,34 +254,58 @@ def download_videos(

def time_to_seconds(time_str: str) -> float:
"""
将时间字符串转换为秒数
支持格式:
1. "MM:SS" (分:秒)
2. "SS" (纯秒数)
将时间字符串转换为秒数,支持多种格式:
1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
2. 'MM:SS' (分:秒)
3. 'SS' (秒)
"""
parts = time_str.split(':')
if len(parts) == 2:
minutes, seconds = map(float, parts)
return minutes * 60 + seconds
return float(time_str)
try:
# 处理毫秒部分
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = int(ms_part) / 1000
else:
time_part = time_str
ms = 0

# 根据格式分别处理
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
time_obj = datetime.strptime(time_part, "%H:%M:%S")
seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
elif len(parts) == 2: # MM:SS
time_obj = datetime.strptime(time_part, "%M:%S")
seconds = time_obj.minute * 60 + time_obj.second
else: # SS
seconds = float(time_part)

return seconds + ms
except ValueError as e:
logger.error(f"时间格式错误: {time_str}")
raise ValueError(f"时间格式错误,支持的格式:HH:MM:SS,mmm 或 MM:SS 或 SS") from e


def format_timestamp(seconds: float) -> str:
"""
将秒数转换为 "MM:SS" 格式的时间字符串
将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
"""
minutes = int(seconds) // 60
secs = int(seconds) % 60
return f"{minutes:02d}:{secs:02d}"
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remain = seconds % 60
whole_seconds = int(seconds_remain)
milliseconds = int((seconds_remain - whole_seconds) * 1000)

return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"


def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
"""
保存剪辑后的视频
Args:
timestamp: 需要裁剪的单个时间戳,支持两种格式:
1. '00:36-00:40' (分:秒-分:秒)
2. 'SS-SS' (秒-秒)
timestamp: 需要裁剪的单个时间戳,支持格式:
1. 'HH:MM:SS,mmm-HH:MM:SS,mmm' (时:分:秒,毫秒)
2. 'MM:SS-MM:SS' (分:秒-分:秒)
3. 'SS-SS' (秒-秒)
origin_video: 原视频路径
save_dir: 存储目录
Expand All @@ -293,7 +318,7 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
if not os.path.exists(save_dir):
os.makedirs(save_dir)

video_id = f"vid-{timestamp.replace(':', '_')}"
video_id = f"vid-{timestamp.replace(':', '_').replace(',', '-')}"
video_path = f"{save_dir}/{video_id}.mp4"

if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
Expand All @@ -312,12 +337,12 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di

# 验证时间段是否有效
if start >= total_duration:
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)")
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
video.close()
return {}

if end > total_duration:
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒),将自动调整为视频结尾")
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾")
end = total_duration

if end <= start:
Expand All @@ -332,7 +357,15 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di

try:
# 检查视频是否有音频轨道并写入文件
subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None)
subclip.write_videofile(
video_path,
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
audio=(subclip.audio is not None),
logger=None
)

# 验证生成的视频文件
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
Expand Down
Loading

0 comments on commit 401eb92

Please sign in to comment.