此阶段包含以下步骤:
- 1. 生成音频任务
- 2. 生成音频
- 3. 将音频合并到视频中
+ 1. 生成音频任务和分段
+ 2. 提取参考音频
+ 3. 生成和合并音频文件
+ 4. 将最终音频合并到视频中
""", unsafe_allow_html=True)
- if not os.path.exists("output/output_video_with_audio.mp4"):
+ if not os.path.exists(DUB_VIDEO):
if st.button("开始处理音频", key="audio_processing_button"):
process_audio()
st.rerun()
else:
st.success("音频处理完成!你可以在 `output` 文件夹中查看音频文件。")
if load_key("resolution") != "0x0":
- st.video("output/output_video_with_audio.mp4")
+ st.video(DUB_VIDEO)
if st.button("删除配音文件", key="delete_dubbing_files"):
delete_dubbing_files()
st.rerun()
@@ -87,13 +91,16 @@ def audio_processing_section():
def process_audio():
with st.spinner("生成音频任务中"):
- step8_gen_audio_task.gen_audio_task_main()
+ step8_1_gen_audio_task.gen_audio_task_main()
+ step8_2_gen_dub_chunks.gen_dub_chunks()
with st.spinner("提取参考音频中"):
step9_extract_refer_audio.extract_refer_audio_main()
- with st.spinner("生成音频中"):
- step10_gen_audio.process_sovits_tasks()
- with st.spinner("将音频合并到视频中"):
- step11_merge_audio_to_vid.merge_main()
+ with st.spinner("生成所有音频中"):
+ step10_gen_audio.gen_audio()
+ with st.spinner("合并完整音频中"):
+ step11_merge_full_audio.merge_full_audio()
+ with st.spinner("将配音合并到视频中"):
+ step12_merge_dub_to_vid.merge_video_audio()
st.success("音频处理完成!🎇")
st.balloons()
diff --git "a/i18n/\344\270\255\346\226\207/st_components/sidebar_setting.py" "b/i18n/\344\270\255\346\226\207/st_components/sidebar_setting.py"
index 9b743bba..0f64c786 100644
--- "a/i18n/\344\270\255\346\226\207/st_components/sidebar_setting.py"
+++ "b/i18n/\344\270\255\346\226\207/st_components/sidebar_setting.py"
@@ -4,56 +4,54 @@
import streamlit as st
from core.config_utils import update_key, load_key
-def config_text_input(label, key, help=None):
- """通用配置文本输入处理器"""
- value = st.text_input(label, value=load_key(key), help=help)
- if value != load_key(key):
- update_key(key, value)
- return value
+def config_input(label, key, help=None):
+ """Generic config input handler"""
+ val = st.text_input(label, value=load_key(key), help=help)
+ if val != load_key(key):
+ update_key(key, val)
+ return val
def page_setting():
with st.expander("LLM 配置", expanded=True):
- config_text_input("API_KEY", "api.key")
- config_text_input("BASE_URL", "api.base_url", help="API请求的基础URL")
+ config_input("API_KEY", "api.key")
+ config_input("BASE_URL", "api.base_url", help="API请求的基础URL")
- col1, col2 = st.columns([4, 1])
- with col1:
- config_text_input("模型", "api.model")
- with col2:
+ c1, c2 = st.columns([4, 1])
+ with c1:
+ config_input("模型", "api.model")
+ with c2:
if st.button("📡", key="api"):
- if valid_llm_api():
- st.toast("API 密钥有效", icon="✅")
- else:
- st.toast("API 密钥无效", icon="❌")
+ st.toast("API密钥有效" if check_api() else "API密钥无效",
+ icon="✅" if check_api() else "❌")
with st.expander("转写和字幕设置", expanded=True):
- col1, col2 = st.columns(2)
- with col1:
- whisper_language_options_dict = {
- "🇺🇸 English": "en",
- "🇨🇳 简体中文": "zh",
- "🇪🇸 Español": "es",
- "🇷🇺 Русский": "ru",
- "🇫🇷 Français": "fr",
- "🇩🇪 Deutsch": "de",
- "🇮🇹 Italiano": "it",
- "🇯🇵 日本語": "ja"
+ c1, c2 = st.columns(2)
+ with c1:
+ langs = {
+ "🇺🇸 English": "en",
+ "🇨🇳 简体中文": "zh",
+ "🇪🇸 Español": "es",
+ "🇷🇺 Русский": "ru",
+ "🇫🇷 Français": "fr",
+ "🇩🇪 Deutsch": "de",
+ "🇮🇹 Italiano": "it",
+ "🇯🇵 日本語": "ja"
}
- selected_whisper_language = st.selectbox(
+ lang = st.selectbox(
"识别语言:",
- options=list(whisper_language_options_dict.keys()),
- index=list(whisper_language_options_dict.values()).index(load_key("whisper.language"))
+ options=list(langs.keys()),
+ index=list(langs.values()).index(load_key("whisper.language"))
)
- if whisper_language_options_dict[selected_whisper_language] != load_key("whisper.language"):
- update_key("whisper.language", whisper_language_options_dict[selected_whisper_language])
+ if langs[lang] != load_key("whisper.language"):
+ update_key("whisper.language", langs[lang])
- with col2:
+ with c2:
target_language = st.text_input("目标语言", value=load_key("target_language"))
if target_language != load_key("target_language"):
update_key("target_language", target_language)
- col1, col2 = st.columns(2)
- with col1:
+ c1, c2 = st.columns(2)
+ with c1:
burn_subtitles = st.toggle("烧录字幕", value=load_key("resolution") != "0x0")
resolution_options = {
@@ -61,7 +59,7 @@ def page_setting():
"360p": "640x360"
}
- with col2:
+ with c2:
if burn_subtitles:
selected_resolution = st.selectbox(
"视频分辨率",
@@ -74,32 +72,53 @@ def page_setting():
if resolution != load_key("resolution"):
update_key("resolution", resolution)
-
- with st.expander("配音设置", expanded=False):
- tts_methods = ["openai_tts", "azure_tts", "gpt_sovits", "fish_tts"]
- selected_tts_method = st.selectbox("TTS 方法", options=tts_methods, index=tts_methods.index(load_key("tts_method")))
+
+ with st.expander("配音设置", expanded=True):
+ tts_methods = ["sf_fish_tts", "openai_tts", "azure_tts", "gpt_sovits", "fish_tts"]
+ selected_tts_method = st.selectbox("TTS方法", options=tts_methods, index=tts_methods.index(load_key("tts_method")))
if selected_tts_method != load_key("tts_method"):
update_key("tts_method", selected_tts_method)
- if selected_tts_method == "openai_tts":
- config_text_input("OpenAI 语音", "openai_tts.voice")
- config_text_input("OpenAI TTS API 密钥", "openai_tts.api_key")
- config_text_input("OpenAI TTS API 基础 URL", "openai_tts.base_url")
+ if selected_tts_method == "sf_fish_tts":
+ config_input("SiliconFlow API密钥", "sf_fish_tts.api_key")
+
+ # Add mode selection dropdown
+ mode_options = {
+ "preset": "preset",
+ "custom": "clone(stable)",
+ "dynamic": "clone(dynamic)"
+ }
+ selected_mode = st.selectbox(
+ "模式选择",
+ options=list(mode_options.keys()),
+ format_func=lambda x: mode_options[x],
+ index=list(mode_options.keys()).index(load_key("sf_fish_tts.mode")) if load_key("sf_fish_tts.mode") in mode_options.keys() else 0
+ )
+ if selected_mode != load_key("sf_fish_tts.mode"):
+ update_key("sf_fish_tts.mode", selected_mode)
+
+ if selected_mode == "preset":
+ config_input("语音", "sf_fish_tts.voice")
+
+ elif selected_tts_method == "openai_tts":
+ config_input("OpenAI语音", "openai_tts.voice")
+ config_input("OpenAI TTS API密钥", "openai_tts.api_key")
+ config_input("OpenAI TTS API基础URL", "openai_tts.base_url")
elif selected_tts_method == "fish_tts":
- config_text_input("Fish TTS API 密钥", "fish_tts.api_key")
- fish_tts_character = st.selectbox("Fish TTS 角色", options=list(load_key("fish_tts.character_id_dict").keys()), index=list(load_key("fish_tts.character_id_dict").keys()).index(load_key("fish_tts.character")))
+ config_input("Fish TTS API密钥", "fish_tts.api_key")
+ fish_tts_character = st.selectbox("Fish TTS角色", options=list(load_key("fish_tts.character_id_dict").keys()), index=list(load_key("fish_tts.character_id_dict").keys()).index(load_key("fish_tts.character")))
if fish_tts_character != load_key("fish_tts.character"):
update_key("fish_tts.character", fish_tts_character)
elif selected_tts_method == "azure_tts":
- config_text_input("Azure 密钥", "azure_tts.key")
- config_text_input("Azure 区域", "azure_tts.region")
- config_text_input("Azure 语音", "azure_tts.voice")
+ config_input("Azure密钥", "azure_tts.key")
+ config_input("Azure区域", "azure_tts.region")
+ config_input("Azure语音", "azure_tts.voice")
elif selected_tts_method == "gpt_sovits":
- st.info("配置 GPT_SoVITS,请参考 Github 主页")
- config_text_input("SoVITS 角色", "gpt_sovits.character")
+ st.info("配置GPT_SoVITS,请参考Github主页")
+ config_input("SoVITS角色", "gpt_sovits.character")
refer_mode_options = {1: "模式1:仅用提供的参考音频", 2: "模式2:仅用视频第1条语音做参考", 3: "模式3:使用视频每一条语音做参考"}
selected_refer_mode = st.selectbox(
@@ -112,9 +131,10 @@ def page_setting():
if selected_refer_mode != load_key("gpt_sovits.refer_mode"):
update_key("gpt_sovits.refer_mode", selected_refer_mode)
-def valid_llm_api():
+def check_api():
try:
- response = ask_gpt("This is a test, response 'message':'success' in json format.", response_json=True, log_title='None')
- return response.get('message') == 'success'
+ resp = ask_gpt("This is a test, response 'message':'success' in json format.",
+ response_json=True, log_title='None')
+ return resp.get('message') == 'success'
except Exception:
return False
diff --git "a/i18n/\344\270\255\346\226\207/\344\270\200\351\224\256\345\220\257\345\212\250.bat" "b/i18n/\344\270\255\346\226\207/\344\270\200\351\224\256\345\220\257\345\212\250.bat"
deleted file mode 100644
index 8fd86efa..00000000
--- "a/i18n/\344\270\255\346\226\207/\344\270\200\351\224\256\345\220\257\345\212\250.bat"
+++ /dev/null
@@ -1,13 +0,0 @@
-@echo off
-cd /d %~dp0
-if exist runtime (
- echo 使用 runtime 文件夹...
- runtime\python.exe -m streamlit run st.py
-) else (
- echo 未找到 runtime 文件夹,使用 conda 环境,若启动失败说明 conda 不在系统环境中...
- call activate videolingo
- python -m streamlit run st.py
- call deactivate
-)
-
-pause
diff --git a/install.py b/install.py
index 90dc6a2b..5bb85a7d 100644
--- a/install.py
+++ b/install.py
@@ -4,33 +4,48 @@
import sys
import zipfile
import shutil
-
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+ascii_logo = """
+__ ___ _ _ _
+\ \ / (_) __| | ___ ___ | | (_)_ __ __ _ ___
+ \ \ / /| |/ _` |/ _ \/ _ \| | | | '_ \ / _` |/ _ \
+ \ V / | | (_| | __/ (_) | |___| | | | | (_| | (_) |
+ \_/ |_|\__,_|\___|\___/|_____|_|_| |_|\__, |\___/
+ |___/
+"""
+
def install_package(*packages):
subprocess.check_call([sys.executable, "-m", "pip", "install", *packages])
-install_package("requests", "rich", "ruamel.yaml")
-from pypi_autochoose import main as choose_mirror
-
def check_gpu():
- """Check if NVIDIA GPU is available"""
try:
- # 🔍 Try running nvidia-smi command to detect GPU
subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def main():
+ install_package("requests", "rich", "ruamel.yaml")
from rich.console import Console
from rich.panel import Panel
-
+ from rich.box import DOUBLE
console = Console()
+
+ width = max(len(line) for line in ascii_logo.splitlines()) + 4
+ welcome_panel = Panel(
+ ascii_logo,
+ width=width,
+ box=DOUBLE,
+ title="[bold green]🌏[/bold green]",
+ border_style="bright_blue"
+ )
+ console.print(welcome_panel)
+
console.print(Panel.fit("🚀 Starting Installation", style="bold magenta"))
# Configure mirrors
- console.print(Panel("⚙️ Configuring mirrors", style="bold yellow"))
+ from core.pypi_autochoose import main as choose_mirror
choose_mirror()
# Detect system and GPU
@@ -65,6 +80,10 @@ def install_requirements():
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
def download_and_extract_ffmpeg():
+ # requires both conda-ffmpeg and ffmpeg.exe
+ console.print(Panel("📦 Installing ffmpeg through conda...", style="cyan"))
+ subprocess.check_call(["conda", "install", "-y", "ffmpeg"])
+
import requests
system = platform.system()
if system == "Windows":
@@ -83,15 +102,15 @@ def download_and_extract_ffmpeg():
print(f"{ffmpeg_exe} already exists")
return
- print("Downloading FFmpeg")
+ console.print(Panel("📦 Downloading FFmpeg...", style="cyan"))
response = requests.get(url)
if response.status_code == 200:
filename = "ffmpeg.zip" if system in ["Windows", "Darwin"] else "ffmpeg.tar.xz"
with open(filename, 'wb') as f:
f.write(response.content)
- print(f"FFmpeg downloaded: {filename}")
+ console.print(Panel(f"FFmpeg downloaded: {filename}", style="cyan"))
- print("Extracting FFmpeg")
+ console.print(Panel("📦 Extracting FFmpeg...", style="cyan"))
if system == "Linux":
import tarfile
with tarfile.open(filename) as tar_ref:
@@ -106,15 +125,15 @@ def download_and_extract_ffmpeg():
zip_ref.extract(file)
shutil.move(os.path.join(*file.split('/')[:-1], os.path.basename(file)), os.path.basename(file))
- print("Cleaning up")
+ console.print(Panel("📦 Cleaning up...", style="cyan"))
os.remove(filename)
if system == "Windows":
for item in os.listdir():
if os.path.isdir(item) and "ffmpeg" in item.lower():
shutil.rmtree(item)
- print("FFmpeg extraction completed")
+ console.print(Panel("FFmpeg extraction completed", style="cyan"))
else:
- print("Failed to download FFmpeg")
+ console.print(Panel("❌ Failed to download FFmpeg", style="red"))
def install_noto_font():
if platform.system() == 'Linux':
diff --git a/pip_setup.py b/pip_setup.py
new file mode 100644
index 00000000..42d5d06e
--- /dev/null
+++ b/pip_setup.py
@@ -0,0 +1,70 @@
+import os
+import subprocess
+import sys
+
+script_dir = os.getcwd()
+
+def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, env=None):
+ # Use the conda environment
+ if environment:
+ conda_env_path = os.path.join(script_dir, "installer_files", "env")
+ if sys.platform.startswith("win"):
+ conda_bat_path = os.path.join(script_dir, "installer_files", "conda", "condabin", "conda.bat")
+ cmd = "\"" + conda_bat_path + "\" activate \"" + conda_env_path + "\" >nul && " + cmd
+ else:
+ conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh")
+ cmd = ". \"" + conda_sh_path + "\" && conda activate \"" + conda_env_path + "\" && " + cmd
+
+ # Run shell commands
+ result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env)
+
+ # Assert the command ran successfully
+ if assert_success and result.returncode != 0:
+ print("Command '" + cmd + "' failed with exit status code '" + str(result.returncode) + "'. Exiting...")
+ sys.exit()
+ return result
+
+def check_env():
+ # If we have access to conda, we are probably in an environment
+ conda_exist = run_cmd("conda", environment=True, capture_output=True).returncode == 0
+ if not conda_exist:
+ print("Conda is not installed. Exiting...")
+ sys.exit()
+
+ # Ensure this is a new environment and not the base environment
+ if os.environ["CONDA_DEFAULT_ENV"] == "base":
+ print("Create an environment for this project and activate it. Exiting...")
+ sys.exit()
+
+def check_gpu_win():
+ if not sys.platform.startswith('win'):
+ return
+
+ CUDNN_PATH = "C:\\Program Files\\NVIDIA\\CUDNN\\v9.3\\bin\\12.6"
+
+ def check_gpu():
+ try:
+ subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+ return True
+ except (subprocess.CalledProcessError, FileNotFoundError):
+ return False
+
+ if check_gpu():
+ if CUDNN_PATH not in os.environ.get('PATH', ''):
+ print("🚨 Warning: CUDNN path not found in system environment!")
+ print(f"⚡ Please add the following path to system PATH:\n{CUDNN_PATH}")
+ sys.exit(1)
+ else:
+ print("✅ CUDNN found in system PATH - All good!")
+
+def install_dependencies():
+ run_cmd("python install.py", assert_success=True, environment=True)
+
+def run_model():
+ run_cmd(f"python -m streamlit run st.py", environment=True)
+
+if __name__ == "__main__":
+ check_env()
+ install_dependencies()
+ check_gpu_win()
+ run_model()
diff --git a/pypi_autochoose.py b/pypi_autochoose.py
deleted file mode 100644
index 0623508b..00000000
--- a/pypi_autochoose.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import subprocess
-import time
-import requests
-import os
-import concurrent.futures
-from rich.console import Console
-from rich.table import Table
-from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn
-import sys
-
-MIRRORS = {
- "Tsinghua University": "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple",
- "PyPI Official": "https://pypi.org/simple"
-}
-
-console = Console()
-
-FAST_THRESHOLD = 1000 # ms
-SLOW_THRESHOLD = 1500 # ms
-
-def get_optimal_thread_count():
- try:
- cpu_count = os.cpu_count()
- return max(cpu_count - 1, 1)
- except:
- return 2
-
-def test_mirror_speed(name, url):
- try:
- start_time = time.time()
- response = requests.get(url, timeout=5)
- end_time = time.time()
- if response.status_code == 200:
- speed = (end_time - start_time) * 1000
- return name, speed
- else:
- return name, float('inf')
- except requests.RequestException:
- return name, float('inf')
-
-def set_pip_mirror(url):
- try:
- subprocess.run([sys.executable, "-m", "pip", "config", "set", "global.index-url", url],
- check=True,
- capture_output=True)
- return True
- except subprocess.CalledProcessError as e:
- print(f"Failed to set pip mirror: {e}")
- return False
-
-def get_current_pip_mirror():
- try:
- result = subprocess.run([sys.executable, "-m", "pip", "config", "get", "global.index-url"],
- capture_output=True, text=True, check=True)
- return result.stdout.strip()
- except subprocess.CalledProcessError:
- return None
-
-def main():
- console.print("[yellow]Starting new mirror speed test[/yellow]")
-
- # First test PyPI official mirror
- pypi_name = next(name for name, url in MIRRORS.items() if "pypi.org" in url)
- pypi_url = MIRRORS[pypi_name]
- console.print("[cyan]Testing PyPI official mirror...[/cyan]")
-
- optimal_thread_count = get_optimal_thread_count()
- console.print(f"Using {optimal_thread_count} threads for testing")
-
- _, pypi_speed = test_mirror_speed(pypi_name, pypi_url)
-
- if pypi_speed < FAST_THRESHOLD:
- console.print(f"PyPI official mirror is fast ({pypi_speed:.2f} ms). Using the official mirror.")
- set_pip_mirror(pypi_url)
- return
- elif pypi_speed < SLOW_THRESHOLD:
- console.print(f"PyPI official mirror speed is acceptable ({pypi_speed:.2f} ms). You may continue using it.")
- return
-
- console.print(f"PyPI official mirror is slow ({pypi_speed:.2f} ms). Testing other mirrors...")
-
- # Test other mirrors
- speeds = {}
- with Progress(
- SpinnerColumn(),
- TextColumn("[progress.description]{task.description}"),
- BarColumn(),
- TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
- ) as progress:
- task = progress.add_task("[cyan]Testing mirrors...", total=len(MIRRORS) - 1) # -1 because we already tested PyPI
-
- with concurrent.futures.ThreadPoolExecutor(max_workers=optimal_thread_count) as executor:
- future_to_mirror = {executor.submit(test_mirror_speed, name, url): name for name, url in MIRRORS.items() if name != pypi_name}
- for future in concurrent.futures.as_completed(future_to_mirror):
- name = future_to_mirror[future]
- try:
- name, speed = future.result()
- if speed != float('inf'):
- speeds[name] = speed
- except Exception as exc:
- print(f'{name} generated an exception: {exc}')
- finally:
- progress.update(task, advance=1)
-
- table = Table(title="Mirror Speed Test Results")
- table.add_column("Mirror", style="cyan")
- table.add_column("Response Time (ms)", justify="right", style="magenta")
-
- for name, speed in sorted(speeds.items(), key=lambda x: x[1]):
- table.add_row(name, f"{speed:.2f}")
-
- console.print(table)
-
- if speeds:
- fastest_mirror = min(speeds, key=speeds.get)
- fastest_url = MIRRORS[fastest_mirror]
- console.print(f"\n[green]Fastest mirror: {fastest_mirror} ({fastest_url})[/green]")
- console.print(f"[green]Response time: {speeds[fastest_mirror]:.2f} ms[/green]")
-
- host = fastest_url.split("//")[1].split("/")[0]
- if set_pip_mirror(fastest_url):
- current_mirror = get_current_pip_mirror()
- console.print(f"\n[yellow]Current pip source: {current_mirror}[/yellow]")
-
- if current_mirror == fastest_url:
- console.print(f"[bold green]Successfully switched to {fastest_mirror} mirror.[/bold green]")
- else:
- console.print("[bold red]Switch failed. Current pip source doesn't match the expected one.[/bold red]")
- console.print(f"[yellow]Expected pip source: {fastest_url}[/yellow]")
- console.print("[yellow]Please check the configuration manually or try running this script with administrator privileges.[/yellow]")
- else:
- console.print("[bold red]Failed to switch mirror, will continue using the current source.[/bold red]")
- current_mirror = get_current_pip_mirror()
- console.print(f"[yellow]Current pip source: {current_mirror}[/yellow]")
- console.print("[yellow]Please check if you have sufficient permissions to modify pip configuration.[/yellow]")
- else:
- console.print("[bold red]All mirrors are unreachable. Please check your network connection.[/bold red]")
-
-if __name__ == "__main__":
- main()
diff --git a/requirements.txt b/requirements.txt
index 600c1228..287d5daf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,4 +18,8 @@ yt-dlp
json-repair
ruamel.yaml
autocorrect-py
-demucs[dev] @ git+https://github.com/adefossez/demucs
\ No newline at end of file
+demucs[dev] @ git+https://github.com/adefossez/demucs
+
+syllables
+pypinyin
+g2p-en
diff --git a/st.py b/st.py
index 5eaa2988..951ae1a5 100644
--- a/st.py
+++ b/st.py
@@ -10,6 +10,9 @@
st.set_page_config(page_title="VideoLingo", page_icon="docs/logo.svg")
+SUB_VIDEO = "output/output_sub.mp4"
+DUB_VIDEO = "output/output_dub.mp4"
+
def text_processing_section():
st.header("Translate and Generate Subtitles")
with st.container(border=True):
@@ -25,13 +28,13 @@ def text_processing_section():
6. Merging subtitles into the video
""", unsafe_allow_html=True)
- if not os.path.exists("output/output_video_with_subs.mp4"):
+ if not os.path.exists(SUB_VIDEO):
if st.button("Start Processing Subtitles", key="text_processing_button"):
process_text()
st.rerun()
else:
if load_key("resolution") != "0x0":
- st.video("output/output_video_with_subs.mp4")
+ st.video(SUB_VIDEO)
download_subtitle_zip_button(text="Download All Srt Files")
if st.button("Archive to 'history'", key="cleanup_in_text_processing"):
@@ -60,24 +63,25 @@ def process_text():
st.balloons()
def audio_processing_section():
- st.header("Dubbing (beta)")
+ st.header("Dubbing")
with st.container(border=True):
st.markdown("""
This stage includes the following steps:
- 1. Generate audio tasks
- 2. Generate audio
- 3. Merge audio into the video
+ 1. Generate audio tasks and chunks
+ 2. Extract reference audio
+ 3. Generate and merge audio files
+ 4. Merge final audio into video
""", unsafe_allow_html=True)
- if not os.path.exists("output/output_video_with_audio.mp4"):
+ if not os.path.exists(DUB_VIDEO):
if st.button("Start Audio Processing", key="audio_processing_button"):
process_audio()
st.rerun()
else:
st.success("Audio processing is complete! You can check the audio files in the `output` folder.")
if load_key("resolution") != "0x0":
- st.video("output/output_video_with_audio.mp4")
+ st.video(DUB_VIDEO)
if st.button("Delete dubbing files", key="delete_dubbing_files"):
delete_dubbing_files()
st.rerun()
@@ -87,13 +91,16 @@ def audio_processing_section():
def process_audio():
with st.spinner("Generate audio tasks"):
- step8_gen_audio_task.gen_audio_task_main()
+ step8_1_gen_audio_task.gen_audio_task_main()
+ step8_2_gen_dub_chunks.gen_dub_chunks()
with st.spinner("Extract refer audio"):
step9_extract_refer_audio.extract_refer_audio_main()
- with st.spinner("Generate audio"):
- step10_gen_audio.process_sovits_tasks()
- with st.spinner("Merge audio into the video"):
- step11_merge_audio_to_vid.merge_main()
+ with st.spinner("Generate all audio"):
+ step10_gen_audio.gen_audio()
+ with st.spinner("Merge full audio"):
+ step11_merge_full_audio.merge_full_audio()
+ with st.spinner("Merge dubbing to the video"):
+ step12_merge_dub_to_vid.merge_video_audio()
st.success("Audio processing complete! 🎇")
st.balloons()
diff --git a/st_components/icon.png b/st_components/icon.png
deleted file mode 100644
index c26080fd..00000000
Binary files a/st_components/icon.png and /dev/null differ
diff --git a/st_components/imports_and_utils.py b/st_components/imports_and_utils.py
index 76985fc5..9a6fc4fa 100644
--- a/st_components/imports_and_utils.py
+++ b/st_components/imports_and_utils.py
@@ -1,8 +1,31 @@
import os, sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from core import step1_ytdlp, step2_whisperX, step3_1_spacy_split, step3_2_splitbymeaning, step9_extract_refer_audio
-from core import step4_1_summarize, step4_2_translate_all, step5_splitforsub, step6_generate_final_timeline
-from core import step7_merge_sub_to_vid, step8_gen_audio_task, step10_gen_audio, step11_merge_audio_to_vid
+from core import (
+ # Download & Transcribe 📥
+ step11_merge_full_audio,
+ step1_ytdlp,
+ step2_whisperX,
+
+ # Text Processing & Analysis 📝
+ step3_1_spacy_split,
+ step3_2_splitbymeaning,
+ step4_1_summarize,
+ step4_2_translate_all,
+ step5_splitforsub,
+
+ # Subtitle Timeline & Merging 🎬
+ step6_generate_final_timeline,
+ step7_merge_sub_to_vid,
+
+ # Audio Generation & Processing 🎵
+ step8_1_gen_audio_task,
+ step8_2_gen_dub_chunks,
+ step9_extract_refer_audio,
+ step10_gen_audio,
+
+ # Final Video Composition 🎥
+ step12_merge_dub_to_vid
+)
from core.onekeycleanup import cleanup
from core.delete_retry_dubbing import delete_dubbing_files
from core.ask_gpt import ask_gpt
diff --git a/st_components/sidebar_setting.py b/st_components/sidebar_setting.py
index db371633..b0d88ec5 100644
--- a/st_components/sidebar_setting.py
+++ b/st_components/sidebar_setting.py
@@ -4,56 +4,54 @@
import streamlit as st
from core.config_utils import update_key, load_key
-def config_text_input(label, key, help=None):
- """Generic config text input handler"""
- value = st.text_input(label, value=load_key(key), help=help)
- if value != load_key(key):
- update_key(key, value)
- return value
+def config_input(label, key, help=None):
+ """Generic config input handler"""
+ val = st.text_input(label, value=load_key(key), help=help)
+ if val != load_key(key):
+ update_key(key, val)
+ return val
def page_setting():
with st.expander("LLM Configuration", expanded=True):
- config_text_input("API_KEY", "api.key")
- config_text_input("BASE_URL", "api.base_url", help="Base URL for API requests")
+ config_input("API_KEY", "api.key")
+ config_input("BASE_URL", "api.base_url", help="Base URL for API requests")
- col1, col2 = st.columns([4, 1])
- with col1:
- config_text_input("MODEL", "api.model")
- with col2:
+ c1, c2 = st.columns([4, 1])
+ with c1:
+ config_input("MODEL", "api.model")
+ with c2:
if st.button("📡", key="api"):
- if valid_llm_api():
- st.toast("API Key is valid", icon="✅")
- else:
- st.toast("API Key is invalid", icon="❌")
+ st.toast("API Key is valid" if check_api() else "API Key is invalid",
+ icon="✅" if check_api() else "❌")
with st.expander("Transcription and Subtitle Settings", expanded=True):
- col1, col2 = st.columns(2)
- with col1:
- whisper_language_options_dict = {
- "🇺🇸 English": "en",
- "🇨🇳 简体中文": "zh",
- "🇪🇸 Español": "es",
- "🇷🇺 Русский": "ru",
- "🇫🇷 Français": "fr",
- "🇩🇪 Deutsch": "de",
- "🇮🇹 Italiano": "it",
- "🇯🇵 日本語": "ja"
+ c1, c2 = st.columns(2)
+ with c1:
+ langs = {
+ "🇺🇸 English": "en",
+ "🇨🇳 简体中文": "zh",
+ "🇪🇸 Español": "es",
+ "🇷🇺 Русский": "ru",
+ "🇫🇷 Français": "fr",
+ "🇩🇪 Deutsch": "de",
+ "🇮🇹 Italiano": "it",
+ "🇯🇵 日本語": "ja"
}
- selected_whisper_language = st.selectbox(
+ lang = st.selectbox(
"Recognition Language:",
- options=list(whisper_language_options_dict.keys()),
- index=list(whisper_language_options_dict.values()).index(load_key("whisper.language"))
+ options=list(langs.keys()),
+ index=list(langs.values()).index(load_key("whisper.language"))
)
- if whisper_language_options_dict[selected_whisper_language] != load_key("whisper.language"):
- update_key("whisper.language", whisper_language_options_dict[selected_whisper_language])
+ if langs[lang] != load_key("whisper.language"):
+ update_key("whisper.language", langs[lang])
- with col2:
+ with c2:
target_language = st.text_input("Target Language", value=load_key("target_language"))
if target_language != load_key("target_language"):
update_key("target_language", target_language)
- col1, col2 = st.columns(2)
- with col1:
+ c1, c2 = st.columns(2)
+ with c1:
burn_subtitles = st.toggle("Burn Subtitles", value=load_key("resolution") != "0x0")
resolution_options = {
@@ -61,7 +59,7 @@ def page_setting():
"360p": "640x360"
}
- with col2:
+ with c2:
if burn_subtitles:
selected_resolution = st.selectbox(
"Video Resolution",
@@ -75,31 +73,52 @@ def page_setting():
if resolution != load_key("resolution"):
update_key("resolution", resolution)
- with st.expander("Dubbing Settings", expanded=False):
- tts_methods = ["openai_tts", "azure_tts", "gpt_sovits", "fish_tts"]
+ with st.expander("Dubbing Settings", expanded=True):
+ tts_methods = ["sf_fish_tts", "openai_tts", "azure_tts", "gpt_sovits", "fish_tts"]
selected_tts_method = st.selectbox("TTS Method", options=tts_methods, index=tts_methods.index(load_key("tts_method")))
if selected_tts_method != load_key("tts_method"):
update_key("tts_method", selected_tts_method)
- if selected_tts_method == "openai_tts":
- config_text_input("OpenAI Voice", "openai_tts.voice")
- config_text_input("OpenAI TTS API Key", "openai_tts.api_key")
- config_text_input("OpenAI TTS API Base URL", "openai_tts.base_url")
+ if selected_tts_method == "sf_fish_tts":
+ config_input("SiliconFlow API Key", "sf_fish_tts.api_key")
+
+ # Add mode selection dropdown
+ mode_options = {
+ "preset": "Preset",
+ "custom": "Refer_stable",
+ "dynamic": "Refer_dynamic"
+ }
+ selected_mode = st.selectbox(
+ "Mode Selection",
+ options=list(mode_options.keys()),
+ format_func=lambda x: mode_options[x],
+ index=list(mode_options.keys()).index(load_key("sf_fish_tts.mode")) if load_key("sf_fish_tts.mode") in mode_options.keys() else 0
+ )
+ if selected_mode != load_key("sf_fish_tts.mode"):
+ update_key("sf_fish_tts.mode", selected_mode)
+
+ if selected_mode == "preset":
+ config_input("Voice", "sf_fish_tts.voice")
+
+ elif selected_tts_method == "openai_tts":
+ config_input("OpenAI Voice", "openai_tts.voice")
+ config_input("OpenAI TTS API Key", "openai_tts.api_key")
+ config_input("OpenAI TTS API Base URL", "openai_tts.base_url")
elif selected_tts_method == "fish_tts":
- config_text_input("Fish TTS API Key", "fish_tts.api_key")
+ config_input("Fish TTS API Key", "fish_tts.api_key")
fish_tts_character = st.selectbox("Fish TTS Character", options=list(load_key("fish_tts.character_id_dict").keys()), index=list(load_key("fish_tts.character_id_dict").keys()).index(load_key("fish_tts.character")))
if fish_tts_character != load_key("fish_tts.character"):
update_key("fish_tts.character", fish_tts_character)
elif selected_tts_method == "azure_tts":
- config_text_input("Azure Key", "azure_tts.key")
- config_text_input("Azure Region", "azure_tts.region")
- config_text_input("Azure Voice", "azure_tts.voice")
+ config_input("Azure Key", "azure_tts.key")
+ config_input("Azure Region", "azure_tts.region")
+ config_input("Azure Voice", "azure_tts.voice")
elif selected_tts_method == "gpt_sovits":
st.info("配置GPT_SoVITS,请参考Github主页")
- config_text_input("SoVITS Character", "gpt_sovits.character")
+ config_input("SoVITS Character", "gpt_sovits.character")
refer_mode_options = {1: "模式1:仅用提供的参考音频", 2: "模式2:仅用视频第1条语音做参考", 3: "模式3:使用视频每一条语音做参考"}
selected_refer_mode = st.selectbox(
@@ -112,9 +131,10 @@ def page_setting():
if selected_refer_mode != load_key("gpt_sovits.refer_mode"):
update_key("gpt_sovits.refer_mode", selected_refer_mode)
-def valid_llm_api():
+def check_api():
try:
- response = ask_gpt("This is a test, response 'message':'success' in json format.", response_json=True, log_title='None')
- return response.get('message') == 'success'
+ resp = ask_gpt("This is a test, response 'message':'success' in json format.",
+ response_json=True, log_title='None')
+ return resp.get('message') == 'success'
except Exception:
return False
\ No newline at end of file