VanguardAI commited on
Commit
1061b7a
·
verified ·
1 Parent(s): 724aed2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  from groq import Groq
6
  import spaces
7
  from transformers import AutoModel, AutoTokenizer
 
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  import soundfile as sf
10
  from llama_index.core.agent import ReActAgent
@@ -15,20 +16,19 @@ from tavily import TavilyClient
15
  import requests
16
  from huggingface_hub import hf_hub_download
17
  from safetensors.torch import load_file
18
- from diffusers import StableDiffusion3Pipeline
19
 
20
  # Initialize models and clients
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
  client = Groq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
23
 
24
  vqa_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
25
- device_map="auto", torch_dtype=torch.bfloat16)
26
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
27
 
28
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
29
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
30
 
31
- # Updated Image Generation Model
32
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
33
  pipe = pipe.to("cuda")
34
 
@@ -75,7 +75,7 @@ def image_generation(query):
75
  image = pipe(
76
  query,
77
  negative_prompt="",
78
- num_inference_steps=28,
79
  guidance_scale=7.0,
80
  ).images[0]
81
  image.save("output.jpg")
@@ -111,12 +111,11 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False):
111
  messages = [{"role": "user", "content": [image, user_prompt]}]
112
  response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
113
  else:
114
- # Modify this part to check if a tool is required or if a direct answer suffices
115
  response = agent.chat(user_prompt)
116
 
117
  # Extract the content from AgentChatResponse to return as a string
118
  if isinstance(response, AgentChatResponse):
119
- response = response.response_text
120
 
121
  return response
122
 
@@ -189,4 +188,4 @@ def main_interface(user_prompt, image=None, audio=None, voice_only=False, websea
189
 
190
  # Launch the UI
191
  demo = create_ui()
192
- demo.launch()
 
5
  from groq import Groq
6
  import spaces
7
  from transformers import AutoModel, AutoTokenizer
8
+ from diffusers import StableDiffusion3Pipeline
9
  from parler_tts import ParlerTTSForConditionalGeneration
10
  import soundfile as sf
11
  from llama_index.core.agent import ReActAgent
 
16
  import requests
17
  from huggingface_hub import hf_hub_download
18
  from safetensors.torch import load_file
 
19
 
20
  # Initialize models and clients
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
  client = Groq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
23
 
24
  vqa_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
25
+ device_map="auto", torch_dtype=torch.bfloat16)
26
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
27
 
28
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
29
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
30
 
31
+ # Updated Image generation model
32
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
33
  pipe = pipe.to("cuda")
34
 
 
75
  image = pipe(
76
  query,
77
  negative_prompt="",
78
+ num_inference_steps=15,
79
  guidance_scale=7.0,
80
  ).images[0]
81
  image.save("output.jpg")
 
111
  messages = [{"role": "user", "content": [image, user_prompt]}]
112
  response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
113
  else:
 
114
  response = agent.chat(user_prompt)
115
 
116
  # Extract the content from AgentChatResponse to return as a string
117
  if isinstance(response, AgentChatResponse):
118
+ response = response.final_response # Use 'final_response' to access the text response
119
 
120
  return response
121
 
 
188
 
189
  # Launch the UI
190
  demo = create_ui()
191
+ demo.launch()