wedyanessam's picture
Update app.py
93d986f verified
raw
history blame
4.09 kB
import subprocess
import os
import sys
import shutil
from pathlib import Path
import argparse
import gradio as gr
# โœ… ุงู„ุชู†ุธูŠู ุฃูˆู„ุงู‹: ูู‚ุท ู„ู„ู…ุฌู„ุฏุงุช ุงู„ู…ุคู‚ุชุฉ
folders_to_delete = ["./output", "./__pycache__", "./.cache", "./temp"]
for folder in folders_to_delete:
if os.path.exists(folder):
print(f"๐Ÿ—‘๏ธ ุญุฐู {folder}")
shutil.rmtree(folder)
# โœ… ุทุจุงุนุฉ ุญุงู„ุฉ ุงู„ุฐุงูƒุฑุฉ
import psutil
mem = psutil.virtual_memory()
print(f"๐Ÿ” RAM ุงู„ู…ุณุชุฎุฏู…ุฉ: {mem.used / 1e9:.2f} GB / {mem.total / 1e9:.2f} GB")
# โœ… ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ุฅุฐุง ู…ุง ูƒุงู†ุช ู…ูˆุฌูˆุฏุฉ
if not os.path.exists("./models/fantasytalking_model.ckpt"):
print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
subprocess.run(["python", "download_models.py"])
# โœ… ุฅุนุฏุงุฏ ุงู„ู…ุณุงุฑุงุช
sys.path.append(os.path.abspath("."))
# โœ… ุงุณุชูŠุฑุงุฏ ุงู„ู…ูƒูˆู†ุงุช
from STT.sst import speech_to_text
from LLM.llm import generate_reply
from TTS_X.tts import generate_voice
from FantasyTalking.infer import load_models, main
# โœ… ุซุงุจุชุงุช ุงู„ู†ู…ูˆุฐุฌ
args_template = argparse.Namespace(
fantasytalking_model_path="./models/fantasytalking_model.ckpt",
wav2vec_model_dir="./models/wav2vec2-base-960h",
wan_model_dir="./models/Wan2.1-I2V-14B-720P",
image_path="",
audio_path="",
prompt="",
output_dir="./output",
image_size=512,
audio_scale=1.0,
prompt_cfg_scale=5.0,
audio_cfg_scale=5.0,
max_num_frames=81,
inference_steps=20,
fps=23,
num_persistent_param_in_dit=None,
seed=1111
)
# โœ… ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ
print("๐Ÿš€ ุฌุงุฑูŠ ุชุญู…ูŠู„ FantasyTalking ูˆ Wav2Vec...")
pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
# โœ… ุชูˆู„ูŠุฏ ููŠุฏูŠูˆ
def generate_video(image_path, audio_path, prompt, output_dir="./output"):
# ุงู†ุณุฎูŠ args_template ุฅู„ู‰ dict ุนุดุงู† ู†ุนุฏู„ ุนู„ูŠู‡ ุจุณู‡ูˆู„ุฉ
args_dict = vars(args_template).copy()
# ู†ุญุฏุซ ูู‚ุท ุงู„ู„ูŠ ู†ุญุชุงุฌู‡
args_dict.update({
"image_path": image_path,
"audio_path": audio_path,
"prompt": prompt,
"output_dir": output_dir
})
# ู†ุญูˆู„ ู…ู† dict ุฅู„ู‰ argparse.Namespace
args = argparse.Namespace(**args_dict)
return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
# โœ… ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„
def full_pipeline(user_audio, user_image):
print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
user_text = speech_to_text(user_audio)
print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
reply = generate_reply(user_text)
print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
reply_audio_path = generate_voice(reply)
print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
Path("./output").mkdir(parents=True, exist_ok=True)
video_path = generate_video(
image_path=user_image,
audio_path=reply_audio_path,
prompt=reply
)
return user_text, reply, reply_audio_path, video_path
# โœ… ูˆุงุฌู‡ุฉ Gradio
with gr.Blocks(title="๐Ÿง  ุตูˆุชูƒ ูŠุญุฑูƒ ุตูˆุฑุฉ!") as demo:
gr.Markdown("## ๐ŸŽคโžก๏ธ๐Ÿ’ฌโžก๏ธ๐Ÿ”Šโžก๏ธ๐Ÿ“ฝ๏ธ ู…ู† ุตูˆุชูƒ ุฅู„ู‰ ููŠุฏูŠูˆ ู…ุชูƒู„ู…!")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="๐ŸŽ™๏ธ ุงุฑูุน ุตูˆุชูƒ", type="filepath")
image_input = gr.Image(label="๐Ÿ–ผ๏ธ ุตูˆุฑุฉ ุงู„ู…ุชุญุฏุซ", type="filepath")
btn = gr.Button("๐ŸŽฌ ุดุบู„")
with gr.Column():
user_text = gr.Textbox(label="๐Ÿ“ ุงู„ู†ุต ุงู„ู…ุณู…ูˆุน")
reply_text = gr.Textbox(label="๐Ÿค– ุฑุฏ ุงู„ู…ุณุงุนุฏ")
reply_audio = gr.Audio(label="๐Ÿ”Š ุงู„ุฑุฏ ุงู„ู…ู†ุทูˆู‚")
video_output = gr.Video(label="๐Ÿ“ฝ๏ธ ุงู„ููŠุฏูŠูˆ ุงู„ู†ุงุชุฌ")
btn.click(fn=full_pipeline,
inputs=[audio_input, image_input],
outputs=[user_text, reply_text, reply_audio, video_output])
demo.launch(inbrowser=True, share=True)