import gradio as gr import numpy as np import cv2 import librosa import moviepy.editor as mp import speech_recognition as sr import tempfile import wave import os import tensorflow as tf from tensorflow.keras.preprocessing.text import tokenizer_from_json from tensorflow.keras.models import load_model, model_from_json from sklearn.preprocessing import StandardScaler from tensorflow.keras.preprocessing.sequence import pad_sequences import nltk from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import pickle import json from transformers import pipeline nltk.download('punkt') # Tokenizer nltk.download('wordnet') # WordNet lemmatizer nltk.download('stopwords') # Stopwords # Load text emotion model with open('model_architecture_for_text_emotion_updated_json.json', 'r') as json_file: model_json = json_file.read() text_model = model_from_json(model_json) text_model.load_weights("model_for_text_emotion_updated(1).keras") # Load tokenizer with open('tokenizer.json') as json_file: tokenizer_json = json.load(json_file) tokenizer = tokenizer_from_json(tokenizer_json) # Load LLM chatbot (replace with LLama or another LLM of your choice) chatbot = pipeline("text-generation", model="facebook/blenderbot-3B") # Initialize NLTK lemmatizer = WordNetLemmatizer() stop_words = set(stopwords.words('english')) def preprocess_text(text): tokens = nltk.word_tokenize(text.lower()) tokens = [word for word in tokens if word.isalnum() and word not in stop_words] lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens] return ' '.join(lemmatized_tokens) # Transcribe audio and get emotion def transcribe_audio(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio_record = recognizer.record(source) text = recognizer.recognize_google(audio_record) pre_text = preprocess_text(text) title_seq = tokenizer.texts_to_sequences([pre_text]) padded_title_seq = pad_sequences(title_seq, maxlen=35, padding='post', truncating='post') inp1 = np.array(padded_title_seq) text_prediction = text_model.predict(inp1) mapping = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"} max_index = text_prediction.argmax() return text, mapping[max_index] # Chatbot response def chatbot_response(audio_file): user_input, emotion = transcribe_audio(audio_file) response = chatbot(user_input, max_length=100, num_return_sequences=1)[0]['generated_text'] return f"Detected Emotion: {emotion}\nChatbot: {response}" # Create Gradio Interface iface = gr.Interface( fn=chatbot_response, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Emotion-Aware AI Chatbot", description="Speak into the microphone, and the chatbot will analyze your emotion and respond accordingly." ) iface.launch()