welcometoFightclub commited on
Commit
90a9f0d
·
verified ·
1 Parent(s): ca14298

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +227 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import cv2
4
+ import speech_recognition as sr
5
+ from groq import Groq
6
+ import os
7
+ import time
8
+ import base64
9
+ from io import BytesIO
10
+ from gtts import gTTS
11
+
12
+ # Set device
13
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14
+ print(f"Using device: {device}")
15
+
16
+ # Clear GPU memory if using GPU
17
+ if torch.cuda.is_available():
18
+ torch.cuda.empty_cache()
19
+
20
+ # Grok API client with API key (stored as environment variable for security)
21
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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") # Replace with your key or use env variable
22
+ try:
23
+ client = Groq(api_key=GROQ_API_KEY)
24
+ print("Grok client initialized successfully")
25
+ except Exception as e:
26
+ print(f"Error initializing Groq client: {str(e)}")
27
+ raise
28
+
29
+ # Functions
30
+ def predict_text_emotion(text):
31
+ prompt = f"The user has entered text '{text}' classify user's emotion as happy or sad or anxious or angry. Respond in only one word."
32
+ try:
33
+ completion = client.chat.completions.create(
34
+ model="llama-3.2-90b-vision-preview",
35
+ messages=[{"role": "user", "content": prompt}],
36
+ temperature=1,
37
+ max_completion_tokens=64,
38
+ top_p=1,
39
+ stream=False,
40
+ stop=None,
41
+ )
42
+ return completion.choices[0].message.content
43
+ except Exception as e:
44
+ return f"Error with Grok API: {str(e)}"
45
+
46
+ def transcribe_audio(audio_path):
47
+ r = sr.Recognizer()
48
+ with sr.AudioFile(audio_path) as source:
49
+ audio_text = r.listen(source)
50
+ try:
51
+ text = r.recognize_google(audio_text)
52
+ return text
53
+ except sr.UnknownValueError:
54
+ return "I didn’t catch that—could you try again?"
55
+ except sr.RequestError:
56
+ return "Speech recognition unavailable—try typing instead."
57
+
58
+ def capture_webcam_frame():
59
+ cap = cv2.VideoCapture(0)
60
+ if not cap.isOpened():
61
+ return None
62
+ start_time = time.time()
63
+ while time.time() - start_time < 2:
64
+ ret, frame = cap.read()
65
+ if ret:
66
+ _, buffer = cv2.imencode('.jpg', frame)
67
+ img_base64 = base64.b64encode(buffer).decode('utf-8')
68
+ img_url = f"data:image/jpeg;base64,{img_base64}"
69
+ cap.release()
70
+ return img_url
71
+ cap.release()
72
+ return None
73
+
74
+ def detect_facial_emotion():
75
+ img_url = capture_webcam_frame()
76
+ if not img_url:
77
+ return "neutral"
78
+ try:
79
+ completion = client.chat.completions.create(
80
+ model="llama-3.2-90b-vision-preview",
81
+ messages=[
82
+ {
83
+ "role": "user",
84
+ "content": [
85
+ {"type": "text", "text": "Identify user's facial emotion into happy or sad or anxious or angry. Respond in one word only"},
86
+ {"type": "image_url", "image_url": {"url": img_url}}
87
+ ]
88
+ }
89
+ ],
90
+ temperature=1,
91
+ max_completion_tokens=20,
92
+ top_p=1,
93
+ stream=False,
94
+ stop=None,
95
+ )
96
+ emotion = completion.choices[0].message.content.strip().lower()
97
+ if emotion not in ["happy", "sad", "anxious", "angry"]:
98
+ return "neutral"
99
+ return emotion
100
+ except Exception as e:
101
+ print(f"Error with Grok facial detection: {str(e)}")
102
+ return "neutral"
103
+
104
+ def generate_response(user_input, emotion):
105
+ prompt = f"The user is feeling {emotion}. They said: '{user_input}'. Respond in a friendly caring manner with the user so the user feels being loved."
106
+ try:
107
+ completion = client.chat.completions.create(
108
+ model="llama-3.2-90b-vision-preview",
109
+ messages=[{"role": "user", "content": prompt}],
110
+ temperature=1,
111
+ max_completion_tokens=64,
112
+ top_p=1,
113
+ stream=False,
114
+ stop=None,
115
+ )
116
+ return completion.choices[0].message.content
117
+ except Exception as e:
118
+ return f"Error with Groq API: {str(e)}"
119
+
120
+ def text_to_speech(text):
121
+ try:
122
+ tts = gTTS(text=text, lang='en', slow=False)
123
+ audio_buffer = BytesIO()
124
+ tts.write_to_fp(audio_buffer)
125
+ audio_buffer.seek(0)
126
+ return audio_buffer
127
+ except Exception as e:
128
+ print(f"Error generating speech: {str(e)}")
129
+ return None
130
+
131
+ # Chat function for Gradio with voice output
132
+ def chat_function(input_type, text_input, audio_input, chat_history):
133
+ if input_type == "text" and text_input:
134
+ user_input = text_input
135
+ elif input_type == "voice" and audio_input:
136
+ user_input = transcribe_audio(audio_input)
137
+ else:
138
+ return chat_history, "Please provide text or voice input.", gr.update(value=text_input), None
139
+
140
+ text_emotion = predict_text_emotion(user_input)
141
+ if not chat_history:
142
+ gr.Info("Please look at the camera for emotion detection...")
143
+ facial_emotion = detect_facial_emotion()
144
+ else:
145
+ facial_emotion = "neutral"
146
+
147
+ emotions = [e for e in [text_emotion, facial_emotion] if e and e != "neutral"]
148
+ combined_emotion = emotions[0] if emotions else "neutral"
149
+
150
+ response = generate_response(user_input, combined_emotion)
151
+ chat_history.append({"role": "user", "content": user_input})
152
+ chat_history.append({"role": "assistant", "content": response})
153
+
154
+ audio_output = text_to_speech(response)
155
+ return chat_history, f"Detected Emotion: {combined_emotion}", "", audio_output
156
+
157
+ # Custom CSS for better styling
158
+ css = """
159
+ <style>
160
+ .chatbot .message-user {
161
+ background-color: #e3f2fd;
162
+ border-radius: 10px;
163
+ padding: 10px;
164
+ margin: 5px 0;
165
+ }
166
+ .chatbot .message-assistant {
167
+ background-color: #c8e6c9;
168
+ border-radius: 10px;
169
+ padding: 10px;
170
+ margin: 5px 0;
171
+ }
172
+ .input-container {
173
+ padding: 10px;
174
+ background-color: #f9f9f9;
175
+ border-radius: 10px;
176
+ margin-top: 10px;
177
+ }
178
+ </style>
179
+ """
180
+
181
+ # Build the Gradio interface
182
+ with gr.Blocks(theme=gr.themes.Soft(), css=css) as app:
183
+ gr.Markdown(
184
+ """
185
+ # Multimodal Mental Health AI Agent
186
+ Chat with our empathetic AI designed to support you by understanding your emotions through text and facial expressions.
187
+ """
188
+ )
189
+
190
+ with gr.Row():
191
+ with gr.Column(scale=1):
192
+ emotion_display = gr.Textbox(label="Emotion", interactive=False, placeholder="Detected emotion will appear here")
193
+
194
+ with gr.Column(scale=3):
195
+ chatbot = gr.Chatbot(label="Conversation History", height=500, type="messages", elem_classes="chatbot")
196
+
197
+ with gr.Row(elem_classes="input-container"):
198
+ input_type = gr.Radio(["text", "voice"], label="Input Method", value="text")
199
+ text_input = gr.Textbox(label="Type Your Message", placeholder="How are you feeling today?", visible=True)
200
+ audio_input = gr.Audio(type="filepath", label="Record Your Message", visible=False)
201
+ submit_btn = gr.Button("Send", variant="primary")
202
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
203
+ audio_output = gr.Audio(label="Assistant Response", type="filepath", interactive=False, autoplay=True)
204
+
205
+ # Dynamic visibility based on input type
206
+ def update_visibility(input_type):
207
+ return gr.update(visible=input_type == "text"), gr.update(visible=input_type == "voice")
208
+
209
+ input_type.change(fn=update_visibility, inputs=input_type, outputs=[text_input, audio_input])
210
+
211
+ # Submit action with voice output
212
+ submit_btn.click(
213
+ fn=chat_function,
214
+ inputs=[input_type, text_input, audio_input, chatbot],
215
+ outputs=[chatbot, emotion_display, text_input, audio_output]
216
+ )
217
+
218
+ # Clear chat and audio
219
+ clear_btn.click(
220
+ lambda: ([], "", "", None),
221
+ inputs=None,
222
+ outputs=[chatbot, emotion_display, text_input, audio_output]
223
+ )
224
+
225
+ # Launch the app (for local testing; deployment will handle this differently)
226
+ if __name__ == "__main__":
227
+ app.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
File without changes