Spaces:
Runtime error
Runtime error
File size: 4,090 Bytes
7cedd07 e75e1c6 5223b6a fa13218 97e8796 5223b6a 7cedd07 fa13218 da8f7f9 7cedd07 f71a8b3 5223b6a 1ed541f da8f7f9 3ec929e 769c7b4 c79816c aa3c3a8 d9d25fe fa13218 97e8796 da8f7f9 fa13218 97e8796 fa13218 97e8796 fa13218 97e8796 3402d0b 568d66f da8f7f9 ce79c62 97e8796 ce79c62 97e8796 fa13218 97e8796 93d986f fa13218 3402d0b fa13218 3402d0b 5223b6a 3402d0b 97e8796 5223b6a 568d66f 3402d0b 5223b6a 97e8796 3402d0b 5223b6a 97e8796 3402d0b 97e8796 3402d0b da8f7f9 3402d0b e682a2e 3402d0b e682a2e 3402d0b e682a2e 97e8796 3402d0b 9dd7b34 3402d0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import subprocess
import os
import sys
import shutil
from pathlib import Path
import argparse
import gradio as gr
# โ
ุงูุชูุธูู ุฃููุงู: ููุท ููู
ุฌูุฏุงุช ุงูู
ุคูุชุฉ
folders_to_delete = ["./output", "./__pycache__", "./.cache", "./temp"]
for folder in folders_to_delete:
if os.path.exists(folder):
print(f"๐๏ธ ุญุฐู {folder}")
shutil.rmtree(folder)
# โ
ุทุจุงุนุฉ ุญุงูุฉ ุงูุฐุงูุฑุฉ
import psutil
mem = psutil.virtual_memory()
print(f"๐ RAM ุงูู
ุณุชุฎุฏู
ุฉ: {mem.used / 1e9:.2f} GB / {mem.total / 1e9:.2f} GB")
# โ
ุชุญู
ูู ุงูู
ูุฏููุงุช ุฅุฐุง ู
ุง ูุงูุช ู
ูุฌูุฏุฉ
if not os.path.exists("./models/fantasytalking_model.ckpt"):
print("๐ ๏ธ ุฌุงุฑู ุชุญู
ูู ุงููู
ุงุฐุฌ ุนุจุฑ download_models.py ...")
subprocess.run(["python", "download_models.py"])
# โ
ุฅุนุฏุงุฏ ุงูู
ุณุงุฑุงุช
sys.path.append(os.path.abspath("."))
# โ
ุงุณุชูุฑุงุฏ ุงูู
ูููุงุช
from STT.sst import speech_to_text
from LLM.llm import generate_reply
from TTS_X.tts import generate_voice
from FantasyTalking.infer import load_models, main
# โ
ุซุงุจุชุงุช ุงููู
ูุฐุฌ
args_template = argparse.Namespace(
fantasytalking_model_path="./models/fantasytalking_model.ckpt",
wav2vec_model_dir="./models/wav2vec2-base-960h",
wan_model_dir="./models/Wan2.1-I2V-14B-720P",
image_path="",
audio_path="",
prompt="",
output_dir="./output",
image_size=512,
audio_scale=1.0,
prompt_cfg_scale=5.0,
audio_cfg_scale=5.0,
max_num_frames=81,
inference_steps=20,
fps=23,
num_persistent_param_in_dit=None,
seed=1111
)
# โ
ุชุญู
ูู ุงููู
ุงุฐุฌ
print("๐ ุฌุงุฑู ุชุญู
ูู FantasyTalking ู Wav2Vec...")
pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
print("โ
ุชู
ุงูุชุญู
ูู!")
# โ
ุชูููุฏ ููุฏูู
def generate_video(image_path, audio_path, prompt, output_dir="./output"):
# ุงูุณุฎู args_template ุฅูู dict ุนุดุงู ูุนุฏู ุนููู ุจุณูููุฉ
args_dict = vars(args_template).copy()
# ูุญุฏุซ ููุท ุงููู ูุญุชุงุฌู
args_dict.update({
"image_path": image_path,
"audio_path": audio_path,
"prompt": prompt,
"output_dir": output_dir
})
# ูุญูู ู
ู dict ุฅูู argparse.Namespace
args = argparse.Namespace(**args_dict)
return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
# โ
ุฎุท ุงูุฃูุงุจูุจ ุงููุงู
ู
def full_pipeline(user_audio, user_image):
print("๐ค ุชุญููู ุงูุตูุช ุฅูู ูุต...")
user_text = speech_to_text(user_audio)
print("๐ฌ ุชูููุฏ ุงูุฑุฏ...")
reply = generate_reply(user_text)
print("๐ ุชุญููู ุงูุฑุฏ ุฅูู ุตูุช...")
reply_audio_path = generate_voice(reply)
print("๐ฝ๏ธ ุชูููุฏ ุงูููุฏูู...")
Path("./output").mkdir(parents=True, exist_ok=True)
video_path = generate_video(
image_path=user_image,
audio_path=reply_audio_path,
prompt=reply
)
return user_text, reply, reply_audio_path, video_path
# โ
ูุงุฌูุฉ Gradio
with gr.Blocks(title="๐ง ุตูุชู ูุญุฑู ุตูุฑุฉ!") as demo:
gr.Markdown("## ๐คโก๏ธ๐ฌโก๏ธ๐โก๏ธ๐ฝ๏ธ ู
ู ุตูุชู ุฅูู ููุฏูู ู
ุชููู
!")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="๐๏ธ ุงุฑูุน ุตูุชู", type="filepath")
image_input = gr.Image(label="๐ผ๏ธ ุตูุฑุฉ ุงูู
ุชุญุฏุซ", type="filepath")
btn = gr.Button("๐ฌ ุดุบู")
with gr.Column():
user_text = gr.Textbox(label="๐ ุงููุต ุงูู
ุณู
ูุน")
reply_text = gr.Textbox(label="๐ค ุฑุฏ ุงูู
ุณุงุนุฏ")
reply_audio = gr.Audio(label="๐ ุงูุฑุฏ ุงูู
ูุทูู")
video_output = gr.Video(label="๐ฝ๏ธ ุงูููุฏูู ุงููุงุชุฌ")
btn.click(fn=full_pipeline,
inputs=[audio_input, image_input],
outputs=[user_text, reply_text, reply_audio, video_output])
demo.launch(inbrowser=True, share=True)
|