Garvitj commited on
Commit
915d8ef
·
verified ·
1 Parent(s): 25e2b6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -17
app.py CHANGED
@@ -250,18 +250,44 @@ def process_audio_from_video(video_path):
250
 
251
 
252
 
253
-
254
- import torch
255
  import gradio as gr
256
- # Load model directly
257
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
258
 
259
- tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
260
- model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
261
 
 
 
 
 
 
 
 
 
 
 
 
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  def transcribe_and_predict_video(video, chat_history=[]):
264
- # Process the video for emotions
265
  image_emotion = process_video(video)
266
  text_emotion, audio_emotion, user_input = process_audio_from_video(video)
267
  em = [image_emotion, text_emotion, audio_emotion]
@@ -272,12 +298,12 @@ def transcribe_and_predict_video(video, chat_history=[]):
272
  # Construct the prompt with emotion context and history
273
  prompt = f"""
274
  You are a helpful AI assistant. Respond like a human while considering the user's emotion.
275
-
276
  User's Emotion: {em}
277
-
278
  Conversation History:
279
  {history_text}
280
-
281
  User ({em}): {user_input}
282
  Bot:"""
283
 
@@ -288,7 +314,7 @@ def transcribe_and_predict_video(video, chat_history=[]):
288
  output = model.generate(**inputs, max_length=512, temperature=0.7, top_p=0.9, do_sample=True)
289
  response = tokenizer.decode(output[0], skip_special_tokens=True).split("Bot:")[-1].strip()
290
 
291
- # Store the current emotion for the user input (You can modify the emotion detection based on your needs)
292
  emotion = detect_emotion(user_input) # Assuming `detect_emotion` is a function that returns the user's emotion
293
 
294
  # Update the chat history with the current conversation and emotion
@@ -296,13 +322,22 @@ def transcribe_and_predict_video(video, chat_history=[]):
296
 
297
  return response, chat_history
298
 
299
- # Create Gradio interface
300
- iface = gr.Interface(fn=transcribe_and_predict_video,
301
- inputs=gr.Video(),
302
- outputs="text",
303
- title="Multimodal Emotion Recognition from Video",
304
- description="Upload a video to get text, audio, and image emotion predictions.")
305
 
306
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
 
 
250
 
251
 
252
 
 
 
253
  import gradio as gr
254
+ from huggingface_hub import InferenceClient
255
  from transformers import AutoTokenizer, AutoModelForCausalLM
256
+ from huggingface_hub import InferenceClient
257
+
258
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
259
 
 
 
260
 
261
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
262
+ messages = [{"role": "system", "content": system_message}]
263
+
264
+ # Format history with user and bot messages
265
+ for val in history:
266
+ if val[0]:
267
+ messages.append({"role": "user", "content": val[0]})
268
+ if val[1]:
269
+ messages.append({"role": "assistant", "content": val[1]})
270
+
271
+ messages.append({"role": "user", "content": message})
272
 
273
+ response = ""
274
+
275
+ # Stream response from the model
276
+ for message in client.chat_completion(
277
+ messages,
278
+ max_tokens=max_tokens,
279
+ stream=True,
280
+ temperature=temperature,
281
+ top_p=top_p,
282
+ ):
283
+ token = message.choices[0].delta.content
284
+ response += token
285
+ yield response
286
+
287
+
288
+ # Function to handle video processing and interaction
289
  def transcribe_and_predict_video(video, chat_history=[]):
290
+ # Process the video for emotions (use your own emotion detection functions)
291
  image_emotion = process_video(video)
292
  text_emotion, audio_emotion, user_input = process_audio_from_video(video)
293
  em = [image_emotion, text_emotion, audio_emotion]
 
298
  # Construct the prompt with emotion context and history
299
  prompt = f"""
300
  You are a helpful AI assistant. Respond like a human while considering the user's emotion.
301
+
302
  User's Emotion: {em}
303
+
304
  Conversation History:
305
  {history_text}
306
+
307
  User ({em}): {user_input}
308
  Bot:"""
309
 
 
314
  output = model.generate(**inputs, max_length=512, temperature=0.7, top_p=0.9, do_sample=True)
315
  response = tokenizer.decode(output[0], skip_special_tokens=True).split("Bot:")[-1].strip()
316
 
317
+ # Store the current emotion for the user input (modify emotion detection as needed)
318
  emotion = detect_emotion(user_input) # Assuming `detect_emotion` is a function that returns the user's emotion
319
 
320
  # Update the chat history with the current conversation and emotion
 
322
 
323
  return response, chat_history
324
 
 
 
 
 
 
 
325
 
326
+ # Gradio interface setup
327
+ demo = gr.ChatInterface(
328
+ respond,
329
+ additional_inputs=[
330
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
331
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
332
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
333
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
334
+ ],
335
+ )
336
+
337
+ # Launch the Gradio interface
338
+ if __name__ == "__main__":
339
+ demo.launch()
340
+
341
+
342
 
343