Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ MODEL = 'llama3-groq-70b-8192-tool-use-preview'
|
|
| 23 |
|
| 24 |
# Load MiniCPM-V-2_6 with 4-bit quantization
|
| 25 |
text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
|
| 26 |
-
device_map="auto",
|
| 27 |
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
|
| 28 |
|
| 29 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')
|
|
@@ -176,6 +176,7 @@ def initialize_tools():
|
|
| 176 |
@spaces.GPU()
|
| 177 |
# Gradio Interface
|
| 178 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
|
|
|
| 179 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
| 180 |
if voice_only:
|
| 181 |
audio_file = play_voice_output(response)
|
|
|
|
| 23 |
|
| 24 |
# Load MiniCPM-V-2_6 with 4-bit quantization
|
| 25 |
text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
|
| 26 |
+
device_map="auto", torch_dtype=torch.bfloat16)
|
| 27 |
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
|
| 28 |
|
| 29 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')
|
|
|
|
| 176 |
@spaces.GPU()
|
| 177 |
# Gradio Interface
|
| 178 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
| 179 |
+
text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
|
| 180 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
| 181 |
if voice_only:
|
| 182 |
audio_file = play_voice_output(response)
|