wedyanessam commited on
Commit
f71a8b3
ยท
verified ยท
1 Parent(s): df3d223

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -5,12 +5,12 @@ from pathlib import Path
5
  import argparse
6
  import gradio as gr
7
 
8
- # โœ… ุชุดุบูŠู„ ุณูƒุฑุจุช ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ุฅุฐุง ุงู„ู…ูˆุฏูŠู„ุงุช ู…ูˆ ู…ูˆุฌูˆุฏุฉ
9
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
10
  print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
11
  subprocess.run(["python", "download_models.py"])
12
 
13
- # โœ… ุฅุนุฏุงุฏ ุงู„ู…ุณุงุฑุงุช ู„ู„ู…ุดุฑูˆุน
14
  sys.path.append(os.path.abspath("."))
15
 
16
  # โœ… ุงุณุชูŠุฑุงุฏ ุงู„ู…ูƒูˆู†ุงุช
@@ -19,11 +19,11 @@ from LLM.llm import generate_reply
19
  from TTS_X.tts import generate_voice
20
  from FantasyTalking.infer import load_models, main
21
 
22
- # โœ… ุซุงุจุชุงุช ุงู„ู†ู…ูˆุฐุฌ (ุจุงู„ู…ุณุงุฑุงุช ุงู„ุฌุฏูŠุฏุฉ)
23
  args_template = argparse.Namespace(
24
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
25
  wav2vec_model_dir="./models/wav2vec2-base-960h",
26
- wan_model_dir="./models/Wan2.1-I2V-14B-720P", # ู„ูˆ ุฑุฌุนุชูŠ ุชุณุชุฎุฏู…ูŠ Wan
27
  image_path="",
28
  audio_path="",
29
  prompt="",
@@ -44,8 +44,14 @@ print("๐Ÿš€ ุฌุงุฑูŠ ุชุญู…ูŠู„ FantasyTalking ูˆ Wav2Vec...")
44
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
45
  print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
46
 
47
- # โœ… ุฏุงู„ุฉ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ
48
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
 
 
 
 
 
 
49
  args = argparse.Namespace(
50
  **vars(args_template),
51
  image_path=image_path,
@@ -53,18 +59,28 @@ def generate_video(image_path, audio_path, prompt, output_dir="./output"):
53
  prompt=prompt,
54
  output_dir=output_dir
55
  )
56
- return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
57
 
58
- # โœ… ุฏุงู„ุฉ ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„
 
 
 
 
59
  def full_pipeline(user_audio, user_image):
 
 
 
 
60
  print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
61
  user_text = speech_to_text(user_audio)
 
62
 
63
  print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
64
  reply = generate_reply(user_text)
 
65
 
66
  print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
67
  reply_audio_path = generate_voice(reply)
 
68
 
69
  print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
70
  Path("./output").mkdir(parents=True, exist_ok=True)
@@ -74,6 +90,7 @@ def full_pipeline(user_audio, user_image):
74
  prompt=reply
75
  )
76
 
 
77
  return user_text, reply, reply_audio_path, video_path
78
 
79
  # โœ… ูˆุงุฌู‡ุฉ Gradio
@@ -97,3 +114,5 @@ with gr.Blocks(title="๐Ÿง  ุตูˆุชูƒ ูŠุญุฑูƒ ุตูˆุฑุฉ!") as demo:
97
  outputs=[user_text, reply_text, reply_audio, video_output])
98
 
99
  demo.launch(inbrowser=True, share=True)
 
 
 
5
  import argparse
6
  import gradio as gr
7
 
8
+ # โœ… ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ู„ูˆ ู…ุง ูƒุงู†ุช ู…ูˆุฌูˆุฏุฉ
9
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
10
  print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
11
  subprocess.run(["python", "download_models.py"])
12
 
13
+ # โœ… ุฅุนุฏุงุฏ ุงู„ู…ุณุงุฑุงุช
14
  sys.path.append(os.path.abspath("."))
15
 
16
  # โœ… ุงุณุชูŠุฑุงุฏ ุงู„ู…ูƒูˆู†ุงุช
 
19
  from TTS_X.tts import generate_voice
20
  from FantasyTalking.infer import load_models, main
21
 
22
+ # โœ… ุฅุนุฏุงุฏ ุซูˆุงุจุช ุงู„ู…ูˆุฏูŠู„
23
  args_template = argparse.Namespace(
24
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
25
  wav2vec_model_dir="./models/wav2vec2-base-960h",
26
+ wan_model_dir="./models/Wan2.1-I2V-14B-720P",
27
  image_path="",
28
  audio_path="",
29
  prompt="",
 
44
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
45
  print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
46
 
47
+ # โœ… ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ - ู…ุน ุทุจุงุนุฉ Debug
48
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
49
+ print(f"[๐ŸŽจ] generate_video() ุจุฏุฃ ุงู„ุชุดุบูŠู„")
50
+ print(f"[๐Ÿ“] image_path: {image_path}")
51
+ print(f"[๐Ÿ“] audio_path: {audio_path}")
52
+ print(f"[๐Ÿ’ฌ] prompt: {prompt}")
53
+ print(f"[๐Ÿ“] output_dir: {output_dir}")
54
+
55
  args = argparse.Namespace(
56
  **vars(args_template),
57
  image_path=image_path,
 
59
  prompt=prompt,
60
  output_dir=output_dir
61
  )
 
62
 
63
+ video_path = main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
64
+ print(f"[โœ…] generate_video() ุงู†ุชู‡ู‰ุŒ ุงู„ููŠุฏูŠูˆ ู…ุญููˆุธ ู‡ู†ุง: {video_path}")
65
+ return video_path
66
+
67
+ # โœ… ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„ - ู…ุน ุทุจุงุนุฉ Debug
68
  def full_pipeline(user_audio, user_image):
69
+ print("[๐Ÿš€] full_pipeline() ุจุฏุฃ ุงู„ุชุดุบูŠู„")
70
+ print(f"[๐Ÿ”Š] ู…ู„ู ุงู„ุตูˆุช ุงู„ู…ูุฏุฎู„: {user_audio}")
71
+ print(f"[๐Ÿ–ผ๏ธ] ู…ู„ู ุงู„ุตูˆุฑุฉ ุงู„ู…ูุฏุฎู„ุฉ: {user_image}")
72
+
73
  print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
74
  user_text = speech_to_text(user_audio)
75
+ print(f"[๐Ÿ“] ุงู„ู†ุต ุงู„ู…ุณุชุฎุฑุฌ ู…ู† ุงู„ุตูˆุช: {user_text}")
76
 
77
  print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
78
  reply = generate_reply(user_text)
79
+ print(f"[๐Ÿค–] ุงู„ุฑุฏ ุงู„ู…ููˆู„ุฏ: {reply}")
80
 
81
  print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
82
  reply_audio_path = generate_voice(reply)
83
+ print(f"[๐Ÿ”Š] ู…ุณุงุฑ ุงู„ุตูˆุช ุงู„ู…ููˆู„ุฏ: {reply_audio_path}")
84
 
85
  print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
86
  Path("./output").mkdir(parents=True, exist_ok=True)
 
90
  prompt=reply
91
  )
92
 
93
+ print(f"[โœ…] full_pipeline() ุงู†ุชู‡ู‰ุŒ ุงู„ููŠุฏูŠูˆ ุงู„ู†ู‡ุงุฆูŠ ู‡ู†ุง: {video_path}")
94
  return user_text, reply, reply_audio_path, video_path
95
 
96
  # โœ… ูˆุงุฌู‡ุฉ Gradio
 
114
  outputs=[user_text, reply_text, reply_audio, video_output])
115
 
116
  demo.launch(inbrowser=True, share=True)
117
+
118
+