|
import gradio as gr |
|
import numpy as np |
|
import cv2 |
|
import librosa |
|
import moviepy.editor as mp |
|
import speech_recognition as sr |
|
import tempfile |
|
import wave |
|
import os |
|
import tensorflow as tf |
|
from tensorflow.keras.preprocessing.text import tokenizer_from_json |
|
from tensorflow.keras.models import load_model, model_from_json |
|
from sklearn.preprocessing import StandardScaler |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem import WordNetLemmatizer |
|
import pickle |
|
import json |
|
from transformers import pipeline |
|
|
|
nltk.download('punkt') |
|
nltk.download('wordnet') |
|
nltk.download('stopwords') |
|
|
|
|
|
with open('model_architecture_for_text_emotion_updated_json.json', 'r') as json_file: |
|
model_json = json_file.read() |
|
text_model = model_from_json(model_json) |
|
text_model.load_weights("model_for_text_emotion_updated(1).keras") |
|
|
|
|
|
with open('tokenizer.json') as json_file: |
|
tokenizer_json = json.load(json_file) |
|
tokenizer = tokenizer_from_json(tokenizer_json) |
|
|
|
|
|
chatbot = pipeline("text-generation", model="facebook/blenderbot-3B") |
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
stop_words = set(stopwords.words('english')) |
|
|
|
def preprocess_text(text): |
|
tokens = nltk.word_tokenize(text.lower()) |
|
tokens = [word for word in tokens if word.isalnum() and word not in stop_words] |
|
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens] |
|
return ' '.join(lemmatized_tokens) |
|
|
|
|
|
|
|
def transcribe_audio(audio_file): |
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(audio_file) as source: |
|
audio_record = recognizer.record(source) |
|
text = recognizer.recognize_google(audio_record) |
|
|
|
pre_text = preprocess_text(text) |
|
title_seq = tokenizer.texts_to_sequences([pre_text]) |
|
padded_title_seq = pad_sequences(title_seq, maxlen=35, padding='post', truncating='post') |
|
inp1 = np.array(padded_title_seq) |
|
text_prediction = text_model.predict(inp1) |
|
|
|
mapping = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"} |
|
max_index = text_prediction.argmax() |
|
return text, mapping[max_index] |
|
|
|
|
|
|
|
def chatbot_response(audio_file): |
|
user_input, emotion = transcribe_audio(audio_file) |
|
response = chatbot(user_input, max_length=100, num_return_sequences=1)[0]['generated_text'] |
|
return f"Detected Emotion: {emotion}\nChatbot: {response}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chatbot_response, |
|
inputs=gr.Audio(source="microphone", type="filepath"), |
|
outputs="text", |
|
title="Emotion-Aware AI Chatbot", |
|
description="Speak into the microphone, and the chatbot will analyze your emotion and respond accordingly." |
|
) |
|
|
|
iface.launch() |
|
|