Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -26,11 +26,11 @@ text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=
|
|
26 |
device_map="auto", torch_dtype=torch.bfloat16)
|
27 |
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
|
28 |
|
29 |
-
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
|
30 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
31 |
|
32 |
-
image_model = UNet2DConditionModel.from_config("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
|
33 |
-
image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16, variant="fp16")
|
34 |
image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
|
35 |
|
36 |
# Initialize voice-only mode
|
@@ -177,6 +177,9 @@ def initialize_tools():
|
|
177 |
# Gradio Interface
|
178 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
179 |
text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
|
|
|
|
|
|
|
180 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
181 |
if voice_only:
|
182 |
audio_file = play_voice_output(response)
|
|
|
26 |
device_map="auto", torch_dtype=torch.bfloat16)
|
27 |
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
|
28 |
|
29 |
+
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
|
30 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
31 |
|
32 |
+
image_model = UNet2DConditionModel.from_config("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
|
33 |
+
image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16, variant="fp16")
|
34 |
image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
|
35 |
|
36 |
# Initialize voice-only mode
|
|
|
177 |
# Gradio Interface
|
178 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
179 |
text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
|
180 |
+
tts_model.to("cuda")
|
181 |
+
image_model.to("cuda", torch.float16)
|
182 |
+
image_pip.to("cuda")
|
183 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
184 |
if voice_only:
|
185 |
audio_file = play_voice_output(response)
|