Spaces:
Runtime error
Runtime error
File size: 2,800 Bytes
d9d25fe 97e8796 3ec929e 769c7b4 c79816c aa3c3a8 d9d25fe 97e8796 3402d0b 568d66f 97e8796 3402d0b 97e8796 3402d0b 97e8796 3402d0b 97e8796 3402d0b 97e8796 3402d0b 97e8796 568d66f 3402d0b 97e8796 3402d0b 97e8796 3402d0b 97e8796 3402d0b d9d25fe 97e8796 3402d0b e682a2e 3402d0b e682a2e 3402d0b e682a2e 97e8796 3402d0b 9dd7b34 3402d0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from pathlib import Path
import argparse
from STT.sst import speech_to_text
from LLM.llm import generate_reply
from TTS_X.tts import generate_voice
from FantasyTalking.infer import load_models, main
# ุซุงุจุชุงุช ุชุญู
ูู ุงููู
ูุฐุฌ
args_template = argparse.Namespace(
wan_model_dir="./models/Wan2.1-I2V-14B-720P",
fantasytalking_model_path="./models/fantasytalking_model.ckpt",
wav2vec_model_dir="./models/wav2vec2-base-960h",
image_path="",
audio_path="",
prompt="",
output_dir="./output",
image_size=512,
audio_scale=1.0,
prompt_cfg_scale=5.0,
audio_cfg_scale=5.0,
max_num_frames=81,
inference_steps=20,
fps=23,
num_persistent_param_in_dit=None,
seed=1111
)
# ุชุญู
ูู ุงููู
ุงุฐุฌ ู
ุฑุฉ ูุญุฏุฉ ููุท
pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
def generate_video(image_path, audio_path, prompt, output_dir="./output"):
args = argparse.Namespace(
**vars(args_template),
image_path=image_path,
audio_path=audio_path,
prompt=prompt,
output_dir=output_dir
)
return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
def full_pipeline(user_audio, user_image):
# 1. ุชุญููู ุงูุตูุช ุฅูู ูุต
user_text = speech_to_text(user_audio)
# 2. ุชูููุฏ ุงูุฑุฏ ู
ู LLM
reply = generate_reply(user_text)
# 3. ุชุญููู ุงูุฑุฏ ุฅูู ุตูุช
reply_audio_path = generate_voice(reply)
# 4. ุชูููุฏ ููุฏูู ู
ู ุงูุตูุฑุฉ ูุงูุตูุช
Path("./output").mkdir(parents=True, exist_ok=True)
video_path = generate_video(
image_path=user_image,
audio_path=reply_audio_path,
prompt=reply
)
return user_text, reply, reply_audio_path, video_path
# ูุงุฌูุฉ Gradio
with gr.Blocks(title="๐ง ุตูุชู ูุญุฑู ุตูุฑุฉ!") as demo:
gr.Markdown("## ๐คโก๏ธ๐ฌโก๏ธ๐โก๏ธ๐ฝ๏ธ ู
ู ุตูุชู ุฅูู ููุฏูู ู
ุชููู
!")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="๐๏ธ ุงุฑูุน ุตูุชู", type="filepath")
image_input = gr.Image(label="๐ผ๏ธ ุตูุฑุฉ ุงูู
ุชุญุฏุซ", type="filepath")
btn = gr.Button("๐ฌ ุดุบู")
with gr.Column():
user_text = gr.Textbox(label="๐ ุงููุต ุงูู
ุณู
ูุน")
reply_text = gr.Textbox(label="๐ค ุฑุฏ ุงูู
ุณุงุนุฏ")
reply_audio = gr.Audio(label="๐ ุงูุฑุฏ ุงูู
ูุทูู")
video_output = gr.Video(label="๐ฝ๏ธ ุงูููุฏูู ุงููุงุชุฌ")
btn.click(fn=full_pipeline,
inputs=[audio_input, image_input],
outputs=[user_text, reply_text, reply_audio, video_output])
demo.launch(inbrowser=True, share=True)
|