Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,30 +1,31 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
import cv2
|
4 |
import speech_recognition as sr
|
5 |
from groq import Groq
|
6 |
import os
|
7 |
-
import time
|
8 |
-
import base64
|
9 |
-
from io import BytesIO
|
10 |
-
from gtts import gTTS
|
11 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
# Set device
|
14 |
-
device = torch.device("
|
15 |
-
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
-
# Grok API client with API key (stored as environment variable for security)
|
22 |
-
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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") # Replace with your key or use env variable
|
23 |
try:
|
24 |
client = Groq(api_key=GROQ_API_KEY)
|
25 |
-
|
26 |
except Exception as e:
|
27 |
-
|
28 |
raise
|
29 |
|
30 |
# Functions
|
@@ -35,72 +36,30 @@ def predict_text_emotion(text):
|
|
35 |
model="llama3-70b-8192",
|
36 |
messages=[{"role": "user", "content": prompt}],
|
37 |
temperature=1,
|
38 |
-
|
39 |
top_p=1,
|
40 |
stream=False,
|
41 |
-
stop=None,
|
42 |
)
|
43 |
-
return completion.choices[0].message.content
|
44 |
except Exception as e:
|
45 |
-
|
|
|
46 |
|
47 |
def transcribe_audio(audio_path):
|
48 |
r = sr.Recognizer()
|
49 |
-
with sr.AudioFile(audio_path) as source:
|
50 |
-
audio_text = r.listen(source)
|
51 |
try:
|
|
|
|
|
52 |
text = r.recognize_google(audio_text)
|
53 |
return text
|
54 |
except sr.UnknownValueError:
|
55 |
return "I didn’t catch that—could you try again?"
|
56 |
-
except sr.RequestError:
|
|
|
57 |
return "Speech recognition unavailable—try typing instead."
|
58 |
-
|
59 |
-
def capture_webcam_frame():
|
60 |
-
cap = cv2.VideoCapture(0)
|
61 |
-
if not cap.isOpened():
|
62 |
-
return None
|
63 |
-
start_time = time.time()
|
64 |
-
while time.time() - start_time < 2:
|
65 |
-
ret, frame = cap.read()
|
66 |
-
if ret:
|
67 |
-
_, buffer = cv2.imencode('.jpg', frame)
|
68 |
-
img_base64 = base64.b64encode(buffer).decode('utf-8')
|
69 |
-
img_url = f"data:image/jpeg;base64,{img_base64}"
|
70 |
-
cap.release()
|
71 |
-
return img_url
|
72 |
-
cap.release()
|
73 |
-
return None
|
74 |
-
|
75 |
-
def detect_facial_emotion():
|
76 |
-
img_url = capture_webcam_frame()
|
77 |
-
if not img_url:
|
78 |
-
return "neutral"
|
79 |
-
try:
|
80 |
-
completion = client.chat.completions.create(
|
81 |
-
model="llama3-70b-8192",
|
82 |
-
messages=[
|
83 |
-
{
|
84 |
-
"role": "user",
|
85 |
-
"content": [
|
86 |
-
{"type": "text", "text": "Identify user's facial emotion into happy or sad or anxious or angry. Respond in one word only"},
|
87 |
-
{"type": "image_url", "image_url": {"url": img_url}}
|
88 |
-
]
|
89 |
-
}
|
90 |
-
],
|
91 |
-
temperature=1,
|
92 |
-
max_completion_tokens=20,
|
93 |
-
top_p=1,
|
94 |
-
stream=False,
|
95 |
-
stop=None,
|
96 |
-
)
|
97 |
-
emotion = completion.choices[0].message.content.strip().lower()
|
98 |
-
if emotion not in ["happy", "sad", "anxious", "angry"]:
|
99 |
-
return "neutral"
|
100 |
-
return emotion
|
101 |
except Exception as e:
|
102 |
-
|
103 |
-
return "
|
104 |
|
105 |
def generate_response(user_input, emotion):
|
106 |
prompt = f"The user is feeling {emotion}. They said: '{user_input}'. Respond in a friendly caring manner with the user so the user feels being loved."
|
@@ -109,24 +68,23 @@ def generate_response(user_input, emotion):
|
|
109 |
model="llama3-70b-8192",
|
110 |
messages=[{"role": "user", "content": prompt}],
|
111 |
temperature=1,
|
112 |
-
|
113 |
top_p=1,
|
114 |
stream=False,
|
115 |
-
stop=None,
|
116 |
)
|
117 |
return completion.choices[0].message.content
|
118 |
except Exception as e:
|
119 |
-
|
|
|
120 |
|
121 |
def text_to_speech(text):
|
122 |
try:
|
123 |
tts = gTTS(text=text, lang='en', slow=False)
|
124 |
-
# Create a temporary file to store the audio
|
125 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
126 |
tts.save(temp_audio.name)
|
127 |
return temp_audio.name
|
128 |
except Exception as e:
|
129 |
-
|
130 |
return None
|
131 |
|
132 |
# Chat function for Gradio with voice output
|
@@ -138,45 +96,36 @@ def chat_function(input_type, text_input, audio_input, chat_history):
|
|
138 |
else:
|
139 |
return chat_history, "Please provide text or voice input.", gr.update(value=text_input), None
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
else:
|
146 |
-
facial_emotion = "neutral"
|
147 |
-
|
148 |
-
emotions = [e for e in [text_emotion, facial_emotion] if e and e != "neutral"]
|
149 |
-
combined_emotion = emotions[0] if emotions else "neutral"
|
150 |
-
|
151 |
-
response = generate_response(user_input, combined_emotion)
|
152 |
chat_history.append({"role": "user", "content": user_input})
|
153 |
chat_history.append({"role": "assistant", "content": response})
|
154 |
|
155 |
audio_output = text_to_speech(response)
|
156 |
-
return chat_history, f"Detected Emotion: {
|
157 |
|
158 |
-
# Custom CSS for
|
159 |
css = """
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
}
|
179 |
-
</style>
|
180 |
"""
|
181 |
|
182 |
# Build the Gradio interface
|
@@ -184,7 +133,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as app:
|
|
184 |
gr.Markdown(
|
185 |
"""
|
186 |
# Multimodal Mental Health AI Agent
|
187 |
-
Chat with our empathetic AI designed to support you by understanding your emotions through text and
|
188 |
"""
|
189 |
)
|
190 |
|
@@ -198,7 +147,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as app:
|
|
198 |
with gr.Row(elem_classes="input-container"):
|
199 |
input_type = gr.Radio(["text", "voice"], label="Input Method", value="text")
|
200 |
text_input = gr.Textbox(label="Type Your Message", placeholder="How are you feeling today?", visible=True)
|
201 |
-
audio_input = gr.Audio(type="filepath", label="Record Your Message", visible=False)
|
202 |
submit_btn = gr.Button("Send", variant="primary")
|
203 |
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
204 |
audio_output = gr.Audio(label="Assistant Response", type="filepath", interactive=False, autoplay=True)
|
@@ -223,6 +172,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as app:
|
|
223 |
outputs=[chatbot, emotion_display, text_input, audio_output]
|
224 |
)
|
225 |
|
226 |
-
# Launch the app (for
|
227 |
-
if __name__ == "__main__":
|
228 |
-
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
|
|
3 |
import speech_recognition as sr
|
4 |
from groq import Groq
|
5 |
import os
|
|
|
|
|
|
|
|
|
6 |
import tempfile
|
7 |
+
from gtts import gTTS
|
8 |
+
import logging
|
9 |
+
|
10 |
+
# Set up logging
|
11 |
+
logging.basicConfig(level=logging.INFO)
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
|
14 |
+
# Set device (CPU only for Hugging Face Spaces free tier)
|
15 |
+
device = torch.device("cpu")
|
16 |
+
logger.info(f"Using device: {device}")
|
17 |
|
18 |
+
# Groq API client with API key from Hugging Face Secrets
|
19 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
20 |
+
if not GROQ_API_KEY:
|
21 |
+
logger.error("GROQ_API_KEY environment variable not set")
|
22 |
+
raise ValueError("GROQ_API_KEY environment variable not set")
|
23 |
|
|
|
|
|
24 |
try:
|
25 |
client = Groq(api_key=GROQ_API_KEY)
|
26 |
+
logger.info("Grok client initialized successfully")
|
27 |
except Exception as e:
|
28 |
+
logger.error(f"Error initializing Groq client: {str(e)}")
|
29 |
raise
|
30 |
|
31 |
# Functions
|
|
|
36 |
model="llama3-70b-8192",
|
37 |
messages=[{"role": "user", "content": prompt}],
|
38 |
temperature=1,
|
39 |
+
max_tokens=64,
|
40 |
top_p=1,
|
41 |
stream=False,
|
|
|
42 |
)
|
43 |
+
return completion.choices[0].message.content.strip().lower()
|
44 |
except Exception as e:
|
45 |
+
logger.error(f"Error with Groq API (text emotion): {str(e)}")
|
46 |
+
return "neutral"
|
47 |
|
48 |
def transcribe_audio(audio_path):
|
49 |
r = sr.Recognizer()
|
|
|
|
|
50 |
try:
|
51 |
+
with sr.AudioFile(audio_path) as source:
|
52 |
+
audio_text = r.listen(source)
|
53 |
text = r.recognize_google(audio_text)
|
54 |
return text
|
55 |
except sr.UnknownValueError:
|
56 |
return "I didn’t catch that—could you try again?"
|
57 |
+
except sr.RequestError as e:
|
58 |
+
logger.error(f"Speech recognition error: {str(e)}")
|
59 |
return "Speech recognition unavailable—try typing instead."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
except Exception as e:
|
61 |
+
logger.error(f"Unexpected error in audio transcription: {str(e)}")
|
62 |
+
return "Error processing audio."
|
63 |
|
64 |
def generate_response(user_input, emotion):
|
65 |
prompt = f"The user is feeling {emotion}. They said: '{user_input}'. Respond in a friendly caring manner with the user so the user feels being loved."
|
|
|
68 |
model="llama3-70b-8192",
|
69 |
messages=[{"role": "user", "content": prompt}],
|
70 |
temperature=1,
|
71 |
+
max_tokens=64,
|
72 |
top_p=1,
|
73 |
stream=False,
|
|
|
74 |
)
|
75 |
return completion.choices[0].message.content
|
76 |
except Exception as e:
|
77 |
+
logger.error(f"Error with Groq API (response generation): {str(e)}")
|
78 |
+
return "I'm here for you, but something went wrong. How can I help?"
|
79 |
|
80 |
def text_to_speech(text):
|
81 |
try:
|
82 |
tts = gTTS(text=text, lang='en', slow=False)
|
|
|
83 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
84 |
tts.save(temp_audio.name)
|
85 |
return temp_audio.name
|
86 |
except Exception as e:
|
87 |
+
logger.error(f"Error generating speech: {str(e)}")
|
88 |
return None
|
89 |
|
90 |
# Chat function for Gradio with voice output
|
|
|
96 |
else:
|
97 |
return chat_history, "Please provide text or voice input.", gr.update(value=text_input), None
|
98 |
|
99 |
+
emotion = predict_text_emotion(user_input)
|
100 |
+
response = generate_response(user_input, emotion)
|
101 |
+
|
102 |
+
chat_history = chat_history or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
chat_history.append({"role": "user", "content": user_input})
|
104 |
chat_history.append({"role": "assistant", "content": response})
|
105 |
|
106 |
audio_output = text_to_speech(response)
|
107 |
+
return chat_history, f"Detected Emotion: {emotion}", "", audio_output
|
108 |
|
109 |
+
# Custom CSS for styling
|
110 |
css = """
|
111 |
+
.chatbot .message-user {
|
112 |
+
background-color: #e3f2fd;
|
113 |
+
border-radius: 10px;
|
114 |
+
padding: 10px;
|
115 |
+
margin: 5px 0;
|
116 |
+
}
|
117 |
+
.chatbot .message-assistant {
|
118 |
+
background-color: #c8e6c9;
|
119 |
+
border-radius: 10px;
|
120 |
+
padding: 10px;
|
121 |
+
margin: 5px 0;
|
122 |
+
}
|
123 |
+
.input-container {
|
124 |
+
padding: 10px;
|
125 |
+
background-color: #f9f9f9;
|
126 |
+
border-radius: 10px;
|
127 |
+
margin-top: 10px;
|
128 |
+
}
|
|
|
|
|
129 |
"""
|
130 |
|
131 |
# Build the Gradio interface
|
|
|
133 |
gr.Markdown(
|
134 |
"""
|
135 |
# Multimodal Mental Health AI Agent
|
136 |
+
Chat with our empathetic AI designed to support you by understanding your emotions through text and voice.
|
137 |
"""
|
138 |
)
|
139 |
|
|
|
147 |
with gr.Row(elem_classes="input-container"):
|
148 |
input_type = gr.Radio(["text", "voice"], label="Input Method", value="text")
|
149 |
text_input = gr.Textbox(label="Type Your Message", placeholder="How are you feeling today?", visible=True)
|
150 |
+
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Your Message", visible=False)
|
151 |
submit_btn = gr.Button("Send", variant="primary")
|
152 |
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
153 |
audio_output = gr.Audio(label="Assistant Response", type="filepath", interactive=False, autoplay=True)
|
|
|
172 |
outputs=[chatbot, emotion_display, text_input, audio_output]
|
173 |
)
|
174 |
|
175 |
+
# Launch the app (commented out for Hugging Face Spaces)
|
176 |
+
# if __name__ == "__main__":
|
177 |
+
# app.launch(server_name="0.0.0.0", server_port=7860)
|