new_space / app.py
Garvitj's picture
Create app.py
ea5f08e verified
import gradio as gr
import numpy as np
import cv2
import librosa
import moviepy.editor as mp
import speech_recognition as sr
import tempfile
import wave
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.models import load_model, model_from_json
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pickle
import json
from transformers import pipeline
nltk.download('punkt') # Tokenizer
nltk.download('wordnet') # WordNet lemmatizer
nltk.download('stopwords') # Stopwords
# Load text emotion model
with open('model_architecture_for_text_emotion_updated_json.json', 'r') as json_file:
model_json = json_file.read()
text_model = model_from_json(model_json)
text_model.load_weights("model_for_text_emotion_updated(1).keras")
# Load tokenizer
with open('tokenizer.json') as json_file:
tokenizer_json = json.load(json_file)
tokenizer = tokenizer_from_json(tokenizer_json)
# Load LLM chatbot (replace with LLama or another LLM of your choice)
chatbot = pipeline("text-generation", model="facebook/blenderbot-3B")
# Initialize NLTK
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
tokens = nltk.word_tokenize(text.lower())
tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
return ' '.join(lemmatized_tokens)
# Transcribe audio and get emotion
def transcribe_audio(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio_record = recognizer.record(source)
text = recognizer.recognize_google(audio_record)
pre_text = preprocess_text(text)
title_seq = tokenizer.texts_to_sequences([pre_text])
padded_title_seq = pad_sequences(title_seq, maxlen=35, padding='post', truncating='post')
inp1 = np.array(padded_title_seq)
text_prediction = text_model.predict(inp1)
mapping = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}
max_index = text_prediction.argmax()
return text, mapping[max_index]
# Chatbot response
def chatbot_response(audio_file):
user_input, emotion = transcribe_audio(audio_file)
response = chatbot(user_input, max_length=100, num_return_sequences=1)[0]['generated_text']
return f"Detected Emotion: {emotion}\nChatbot: {response}"
# Create Gradio Interface
iface = gr.Interface(
fn=chatbot_response,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Emotion-Aware AI Chatbot",
description="Speak into the microphone, and the chatbot will analyze your emotion and respond accordingly."
)
iface.launch()