File size: 11,190 Bytes
ebf88d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a70449
ebf88d6
 
 
 
 
3a70449
 
 
ebf88d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a70449
 
 
 
 
 
 
 
 
ebf88d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
from flask import Flask, request, jsonify
import pymongo
from routes.auth_routes import auth_bp
from flask_cors import CORS
import os
from werkzeug.utils import secure_filename
from utils.audioextraction import extract_audio
from utils.expressions import analyze_video_emotions
from utils.transcription import speech_to_text_long
from utils.vocals import predict_emotion
from utils.vocabulary import evaluate_vocabulary
from groq import Groq
import pandas as pd
from bson import ObjectId
import json
from dotenv import load_dotenv
from datetime import datetime
from utils.models import load_models

load_dotenv()
app = Flask(__name__)
CORS(app)

# Load models on startup
models = load_models()

# MongoDB connection
client = pymongo.MongoClient("mongodb+srv://pmsankheb23:[email protected]/")
db = client["Eloquence"]
collections_user = db["user"]
reports_collection = db["reports"]
overall_reports_collection = db["overall_reports"]

# Groq client setup
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Configure upload folder
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'mp4', 'webm', 'wav'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def convert_objectid_to_string(data):
    if isinstance(data, dict):
        new_dict = {}
        for k, v in data.items():
            if isinstance(v, datetime):
                new_dict[k] = v.isoformat()
            else:
                new_dict[k] = convert_objectid_to_string(v)
        return new_dict
    elif isinstance(data, list):
        return [convert_objectid_to_string(item) for item in data]
    elif isinstance(data, ObjectId):
        return str(data)
    return data

app.register_blueprint(auth_bp)

@app.route('/')
def home():
    return "Hello World"

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    file = request.files['file']
    context = request.form.get('context', '')
    title = request.form.get('title', 'Untitled Session')
    mode = request.form.get('mode', 'video')
    user_id = request.form.get('userId')

    if not user_id:
        return jsonify({"error": "User ID is required"}), 400

    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)

        audio_path = os.path.join(app.config['UPLOAD_FOLDER'], 'output.wav')
        if not extract_audio(file_path, audio_path):
            os.remove(file_path)
            return jsonify({"error": "Failed to process audio from the file"}), 500

        emotion_analysis = pd.DataFrame()
        if mode == "video":
            emotion_analysis = analyze_video_emotions(file_path, models["fer"])

        transcription = speech_to_text_long(audio_path, models["whisper"])
        audio_emotion = predict_emotion(
            audio_path,
            models["emotion_model"],
            models["emotion_feature_extractor"],
            models["emotion_id2label"],
        )
        vocabulary_report = evaluate_vocabulary(transcription, context)
        scores = generate_scores(transcription, audio_emotion, emotion_analysis)
        speech_report = generate_speech_report(transcription, context, audio_emotion)
        expression_report = generate_expression_report(emotion_analysis) if mode == "video" else "No expression analysis for audio-only mode."

        report_data = {
            "userId": user_id,
            "title": title,
            "context": context,
            "transcription": transcription,
            "vocabulary_report": vocabulary_report,
            "speech_report": speech_report,
            "expression_report": expression_report,
            "scores": scores,
            "createdAt": datetime.utcnow()
        }

        result = reports_collection.insert_one(report_data)
        report_data["_id"] = str(result.inserted_id)
        update_overall_reports(user_id)

        os.remove(file_path)
        os.remove(audio_path)

        return jsonify(convert_objectid_to_string(report_data)), 200
    return jsonify({"error": "File type not allowed"}), 400

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.get_json()
        user_id = data.get('userId')
        user_message = data.get('message')

        if not user_id or not user_message:
            return jsonify({"error": "User ID and message are required"}), 400

        user_reports = list(reports_collection.find({"userId": user_id}))
        reports_summary = "Here is a summary of the user's past performance:\n"
        for report in user_reports:
            reports_summary += f"- Session '{report.get('title', 'Untitled')}':\n"
            reports_summary += f"  - Vocabulary Score: {report['scores']['vocabulary']}\n"
            reports_summary += f"  - Voice Score: {report['scores']['voice']}\n"
            reports_summary += f"  - Expressions Score: {report['scores']['expressions']}\n"
            reports_summary += f"  - Feedback: {report.get('speech_report', '')}\n\n"

        system_message = f"""
        You are 'Eloquence AI', a friendly and expert public speaking coach. Your goal is to help users improve their speaking skills by providing constructive, encouraging, and actionable feedback.

        User's Past Performance Summary:
        {reports_summary}

        Based on this history and the user's current message, provide a helpful and encouraging response. Be conversational and supportive.
        """

        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
            model="llama3-70b-8192",
        )

        ai_response = chat_completion.choices[0].message.content
        return jsonify({"response": ai_response}), 200

    except Exception as e:
        print(f"Error in /chat endpoint: {e}")
        return jsonify({"error": "An internal error occurred"}), 500

def update_overall_reports(user_id):
    user_reports = list(reports_collection.find({"userId": user_id}))
    if not user_reports:
        return

    num_reports = len(user_reports)
    avg_vocabulary = sum(r["scores"]["vocabulary"] for r in user_reports) / num_reports
    avg_voice = sum(r["scores"]["voice"] for r in user_reports) / num_reports
    avg_expressions = sum(r["scores"]["expressions"] for r in user_reports) / num_reports

    overall_report_data = {
        "userId": user_id,
        "avg_vocabulary": avg_vocabulary,
        "avg_voice": avg_voice,
        "avg_expressions": avg_expressions,
        "overall_reports": generate_overall_reports(user_reports)
    }

    overall_reports_collection.update_one(
        {"userId": user_id},
        {"$set": overall_report_data},
        upsert=True
    )

@app.route('/user-reports-list', methods=['GET'])
def get_user_reports_list():
    user_id = request.args.get('userId')
    if not user_id:
        return jsonify({"error": "User ID is required"}), 400
    user_reports = list(reports_collection.find({"userId": user_id}))
    if not user_reports:
        return jsonify([]), 200
    return jsonify(convert_objectid_to_string(user_reports)), 200

@app.route('/user-reports', methods=['GET'])
def get_user_reports():
    user_id = request.args.get('userId')
    if not user_id:
        return jsonify({"error": "User ID is required"}), 400
    overall_report = overall_reports_collection.find_one({"userId": user_id})
    if not overall_report:
        return jsonify({"error": "No overall report found for the user"}), 404
    return jsonify(convert_objectid_to_string(overall_report)), 200

def generate_report(system_message, user_message):
    try:
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
            model="llama3-70b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error generating report: {e}")
        return "Could not generate report due to an API error."

def generate_overall_reports(user_reports):
    reports_json = json.dumps(convert_objectid_to_string(user_reports), indent=2)
    voice_report = generate_report(
        "You are an expert in speech analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Voice..."
    )
    expressions_report = generate_report(
        "You are an expert in facial expression analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Facial Expressions..."
    )
    vocabulary_report = generate_report(
        "You are an expert in language and vocabulary analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Vocabulary..."
    )
    return {
        "voice_report": voice_report,
        "expressions_report": expressions_report,
        "vocabulary_report": vocabulary_report,
    }

def generate_scores(transcription, audio_emotion, emotion_analysis):
    system_message = """
    You are an expert in speech analysis. Based on the provided data, generate scores (out of 100) for Vocabulary, Voice, and Expressions.
    Provide only the three scores in JSON format, like:
    {"vocabulary": 85, "voice": 78, "expressions": 90}
    """
    emotion_str = emotion_analysis.to_string(index=False) if not emotion_analysis.empty else "No facial data"
    user_message = f"""
    Transcription: {transcription}
    Audio Emotion Data: {audio_emotion}
    Facial Emotion Analysis: {emotion_str}
    Provide only the JSON output with numeric scores.
    """
    report_content = generate_report(system_message, user_message)
    try:
        return json.loads(report_content)
    except (json.JSONDecodeError, TypeError):
        return {"vocabulary": 0, "voice": 0, "expressions": 0}

def generate_speech_report(transcription, context, audio_emotion):
    system_message = f"Context: \"{context}\". Evaluate if emotions in audio match. Emotion data: {audio_emotion}."
    user_message = "Provide a short one paragraph report on the emotional appropriateness of speech..."
    return generate_report(system_message, user_message)

def generate_expression_report(emotion_analysis_str):
    system_message = f"Evaluate the following emotion data: {emotion_analysis_str}."
    user_message = "Provide a short one paragraph report on the emotional appropriateness of facial expressions..."
    return generate_report(system_message, user_message)

if __name__ == '__main__':
    app.run(debug=True)