Spaces:
Runtime error
Runtime error
import gradio as gr | |
from pathlib import Path | |
import argparse | |
from STT.sst import speech_to_text | |
from LLM.llm import generate_reply | |
from TTS_X.tts import generate_voice | |
from FantasyTalking.infer import load_models, main | |
# ุซุงุจุชุงุช ุชุญู ูู ุงููู ูุฐุฌ | |
args_template = argparse.Namespace( | |
wan_model_dir="./models/Wan2.1-I2V-14B-720P", | |
fantasytalking_model_path="./models/fantasytalking_model.ckpt", | |
wav2vec_model_dir="./models/wav2vec2-base-960h", | |
image_path="", | |
audio_path="", | |
prompt="", | |
output_dir="./output", | |
image_size=512, | |
audio_scale=1.0, | |
prompt_cfg_scale=5.0, | |
audio_cfg_scale=5.0, | |
max_num_frames=81, | |
inference_steps=20, | |
fps=23, | |
num_persistent_param_in_dit=None, | |
seed=1111 | |
) | |
# ุชุญู ูู ุงููู ุงุฐุฌ ู ุฑุฉ ูุญุฏุฉ ููุท | |
pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template) | |
def generate_video(image_path, audio_path, prompt, output_dir="./output"): | |
args = argparse.Namespace( | |
**vars(args_template), | |
image_path=image_path, | |
audio_path=audio_path, | |
prompt=prompt, | |
output_dir=output_dir | |
) | |
return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec) | |
def full_pipeline(user_audio, user_image): | |
# 1. ุชุญููู ุงูุตูุช ุฅูู ูุต | |
user_text = speech_to_text(user_audio) | |
# 2. ุชูููุฏ ุงูุฑุฏ ู ู LLM | |
reply = generate_reply(user_text) | |
# 3. ุชุญููู ุงูุฑุฏ ุฅูู ุตูุช | |
reply_audio_path = generate_voice(reply) | |
# 4. ุชูููุฏ ููุฏูู ู ู ุงูุตูุฑุฉ ูุงูุตูุช | |
Path("./output").mkdir(parents=True, exist_ok=True) | |
video_path = generate_video( | |
image_path=user_image, | |
audio_path=reply_audio_path, | |
prompt=reply | |
) | |
return user_text, reply, reply_audio_path, video_path | |
# ูุงุฌูุฉ Gradio | |
with gr.Blocks(title="๐ง ุตูุชู ูุญุฑู ุตูุฑุฉ!") as demo: | |
gr.Markdown("## ๐คโก๏ธ๐ฌโก๏ธ๐โก๏ธ๐ฝ๏ธ ู ู ุตูุชู ุฅูู ููุฏูู ู ุชููู !") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio(label="๐๏ธ ุงุฑูุน ุตูุชู", type="filepath") | |
image_input = gr.Image(label="๐ผ๏ธ ุตูุฑุฉ ุงูู ุชุญุฏุซ", type="filepath") | |
btn = gr.Button("๐ฌ ุดุบู") | |
with gr.Column(): | |
user_text = gr.Textbox(label="๐ ุงููุต ุงูู ุณู ูุน") | |
reply_text = gr.Textbox(label="๐ค ุฑุฏ ุงูู ุณุงุนุฏ") | |
reply_audio = gr.Audio(label="๐ ุงูุฑุฏ ุงูู ูุทูู") | |
video_output = gr.Video(label="๐ฝ๏ธ ุงูููุฏูู ุงููุงุชุฌ") | |
btn.click(fn=full_pipeline, | |
inputs=[audio_input, image_input], | |
outputs=[user_text, reply_text, reply_audio, video_output]) | |
demo.launch(inbrowser=True, share=True) | |