VanguardAI commited on
Commit
7f9822a
·
verified ·
1 Parent(s): 5f53de2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -26,11 +26,11 @@ text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=
26
  device_map="auto", torch_dtype=torch.bfloat16)
27
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
28
 
29
- tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')
30
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
31
 
32
- image_model = UNet2DConditionModel.from_config("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet").to("cuda", torch.float16)
33
- image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16, variant="fp16").to("cuda")
34
  image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
35
 
36
  # Initialize voice-only mode
@@ -177,6 +177,9 @@ def initialize_tools():
177
  # Gradio Interface
178
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
179
  text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
 
 
 
180
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
181
  if voice_only:
182
  audio_file = play_voice_output(response)
 
26
  device_map="auto", torch_dtype=torch.bfloat16)
27
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
28
 
29
+ tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
30
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
31
 
32
+ image_model = UNet2DConditionModel.from_config("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
33
+ image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16, variant="fp16")
34
  image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
35
 
36
  # Initialize voice-only mode
 
177
  # Gradio Interface
178
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
179
  text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
180
+ tts_model.to("cuda")
181
+ image_model.to("cuda", torch.float16)
182
+ image_pip.to("cuda")
183
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
184
  if voice_only:
185
  audio_file = play_voice_output(response)