File size: 2,935 Bytes
ea5f08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import numpy as np
import cv2
import librosa
import moviepy.editor as mp
import speech_recognition as sr
import tempfile
import wave
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.models import load_model, model_from_json
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pickle
import json
from transformers import pipeline

nltk.download('punkt')       # Tokenizer
nltk.download('wordnet')     # WordNet lemmatizer
nltk.download('stopwords')   # Stopwords

# Load text emotion model
with open('model_architecture_for_text_emotion_updated_json.json', 'r') as json_file:
    model_json = json_file.read()
text_model = model_from_json(model_json)
text_model.load_weights("model_for_text_emotion_updated(1).keras")

# Load tokenizer
with open('tokenizer.json') as json_file:
    tokenizer_json = json.load(json_file)
tokenizer = tokenizer_from_json(tokenizer_json)

# Load LLM chatbot (replace with LLama or another LLM of your choice)
chatbot = pipeline("text-generation", model="facebook/blenderbot-3B")

# Initialize NLTK
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(lemmatized_tokens)

# Transcribe audio and get emotion

def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_record = recognizer.record(source)
        text = recognizer.recognize_google(audio_record)
    
    pre_text = preprocess_text(text)
    title_seq = tokenizer.texts_to_sequences([pre_text])
    padded_title_seq = pad_sequences(title_seq, maxlen=35, padding='post', truncating='post')
    inp1 = np.array(padded_title_seq)
    text_prediction = text_model.predict(inp1)
    
    mapping = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}
    max_index = text_prediction.argmax()
    return text, mapping[max_index]

# Chatbot response

def chatbot_response(audio_file):
    user_input, emotion = transcribe_audio(audio_file)
    response = chatbot(user_input, max_length=100, num_return_sequences=1)[0]['generated_text']
    return f"Detected Emotion: {emotion}\nChatbot: {response}"

# Create Gradio Interface
iface = gr.Interface(
    fn=chatbot_response,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs="text",
    title="Emotion-Aware AI Chatbot",
    description="Speak into the microphone, and the chatbot will analyze your emotion and respond accordingly."
)

iface.launch()