from flask import Flask, request, jsonify import pymongo from routes.auth_routes import auth_bp from flask_cors import CORS import os from werkzeug.utils import secure_filename from utils.audioextraction import extract_audio from utils.expressions import analyze_video_emotions from utils.transcription import speech_to_text_long from utils.vocals import predict_emotion from utils.vocabulary import evaluate_vocabulary from groq import Groq import pandas as pd from bson import ObjectId import json from dotenv import load_dotenv from datetime import datetime from utils.models import load_models load_dotenv() app = Flask(__name__) CORS(app) # Load models on startup models = load_models() # MongoDB connection client = pymongo.MongoClient("mongodb+srv://pmsankheb23:KnjSAJM9oB1OMtud@eloquence.yal88.mongodb.net/") db = client["Eloquence"] collections_user = db["user"] reports_collection = db["reports"] overall_reports_collection = db["overall_reports"] # Groq client setup groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Configure upload folder UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'mp4', 'webm', 'wav'} app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def convert_objectid_to_string(data): if isinstance(data, dict): new_dict = {} for k, v in data.items(): if isinstance(v, datetime): new_dict[k] = v.isoformat() else: new_dict[k] = convert_objectid_to_string(v) return new_dict elif isinstance(data, list): return [convert_objectid_to_string(item) for item in data] elif isinstance(data, ObjectId): return str(data) return data app.register_blueprint(auth_bp) @app.route('/') def home(): return "Hello World" @app.route('/upload', methods=['POST']) def upload_file(): if 'file' not in request.files: return jsonify({"error": "No file part"}), 400 file = request.files['file'] context = request.form.get('context', '') title = request.form.get('title', 'Untitled Session') mode = request.form.get('mode', 'video') user_id = request.form.get('userId') if not user_id: return jsonify({"error": "User ID is required"}), 400 if file.filename == '': return jsonify({"error": "No selected file"}), 400 if file and allowed_file(file.filename): filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) audio_path = os.path.join(app.config['UPLOAD_FOLDER'], 'output.wav') if not extract_audio(file_path, audio_path): os.remove(file_path) return jsonify({"error": "Failed to process audio from the file"}), 500 emotion_analysis = pd.DataFrame() if mode == "video": emotion_analysis = analyze_video_emotions(file_path, models["fer"]) transcription = speech_to_text_long(audio_path, models["whisper"]) audio_emotion = predict_emotion( audio_path, models["emotion_model"], models["emotion_feature_extractor"], models["emotion_id2label"], ) vocabulary_report = evaluate_vocabulary(transcription, context) scores = generate_scores(transcription, audio_emotion, emotion_analysis) speech_report = generate_speech_report(transcription, context, audio_emotion) expression_report = generate_expression_report(emotion_analysis) if mode == "video" else "No expression analysis for audio-only mode." report_data = { "userId": user_id, "title": title, "context": context, "transcription": transcription, "vocabulary_report": vocabulary_report, "speech_report": speech_report, "expression_report": expression_report, "scores": scores, "createdAt": datetime.utcnow() } result = reports_collection.insert_one(report_data) report_data["_id"] = str(result.inserted_id) update_overall_reports(user_id) os.remove(file_path) os.remove(audio_path) return jsonify(convert_objectid_to_string(report_data)), 200 return jsonify({"error": "File type not allowed"}), 400 @app.route('/chat', methods=['POST']) def chat(): try: data = request.get_json() user_id = data.get('userId') user_message = data.get('message') if not user_id or not user_message: return jsonify({"error": "User ID and message are required"}), 400 user_reports = list(reports_collection.find({"userId": user_id})) reports_summary = "Here is a summary of the user's past performance:\n" for report in user_reports: reports_summary += f"- Session '{report.get('title', 'Untitled')}':\n" reports_summary += f" - Vocabulary Score: {report['scores']['vocabulary']}\n" reports_summary += f" - Voice Score: {report['scores']['voice']}\n" reports_summary += f" - Expressions Score: {report['scores']['expressions']}\n" reports_summary += f" - Feedback: {report.get('speech_report', '')}\n\n" system_message = f""" You are 'Eloquence AI', a friendly and expert public speaking coach. Your goal is to help users improve their speaking skills by providing constructive, encouraging, and actionable feedback. User's Past Performance Summary: {reports_summary} Based on this history and the user's current message, provide a helpful and encouraging response. Be conversational and supportive. """ chat_completion = groq_client.chat.completions.create( messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message}, ], model="llama3-70b-8192", ) ai_response = chat_completion.choices[0].message.content return jsonify({"response": ai_response}), 200 except Exception as e: print(f"Error in /chat endpoint: {e}") return jsonify({"error": "An internal error occurred"}), 500 def update_overall_reports(user_id): user_reports = list(reports_collection.find({"userId": user_id})) if not user_reports: return num_reports = len(user_reports) avg_vocabulary = sum(r["scores"]["vocabulary"] for r in user_reports) / num_reports avg_voice = sum(r["scores"]["voice"] for r in user_reports) / num_reports avg_expressions = sum(r["scores"]["expressions"] for r in user_reports) / num_reports overall_report_data = { "userId": user_id, "avg_vocabulary": avg_vocabulary, "avg_voice": avg_voice, "avg_expressions": avg_expressions, "overall_reports": generate_overall_reports(user_reports) } overall_reports_collection.update_one( {"userId": user_id}, {"$set": overall_report_data}, upsert=True ) @app.route('/user-reports-list', methods=['GET']) def get_user_reports_list(): user_id = request.args.get('userId') if not user_id: return jsonify({"error": "User ID is required"}), 400 user_reports = list(reports_collection.find({"userId": user_id})) if not user_reports: return jsonify([]), 200 return jsonify(convert_objectid_to_string(user_reports)), 200 @app.route('/user-reports', methods=['GET']) def get_user_reports(): user_id = request.args.get('userId') if not user_id: return jsonify({"error": "User ID is required"}), 400 overall_report = overall_reports_collection.find_one({"userId": user_id}) if not overall_report: return jsonify({"error": "No overall report found for the user"}), 404 return jsonify(convert_objectid_to_string(overall_report)), 200 def generate_report(system_message, user_message): try: chat_completion = groq_client.chat.completions.create( messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message}, ], model="llama3-70b-8192", ) return chat_completion.choices[0].message.content except Exception as e: print(f"Error generating report: {e}") return "Could not generate report due to an API error." def generate_overall_reports(user_reports): reports_json = json.dumps(convert_objectid_to_string(user_reports), indent=2) voice_report = generate_report( "You are an expert in speech analysis...", f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Voice..." ) expressions_report = generate_report( "You are an expert in facial expression analysis...", f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Facial Expressions..." ) vocabulary_report = generate_report( "You are an expert in language and vocabulary analysis...", f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Vocabulary..." ) return { "voice_report": voice_report, "expressions_report": expressions_report, "vocabulary_report": vocabulary_report, } def generate_scores(transcription, audio_emotion, emotion_analysis): system_message = """ You are an expert in speech analysis. Based on the provided data, generate scores (out of 100) for Vocabulary, Voice, and Expressions. Provide only the three scores in JSON format, like: {"vocabulary": 85, "voice": 78, "expressions": 90} """ emotion_str = emotion_analysis.to_string(index=False) if not emotion_analysis.empty else "No facial data" user_message = f""" Transcription: {transcription} Audio Emotion Data: {audio_emotion} Facial Emotion Analysis: {emotion_str} Provide only the JSON output with numeric scores. """ report_content = generate_report(system_message, user_message) try: return json.loads(report_content) except (json.JSONDecodeError, TypeError): return {"vocabulary": 0, "voice": 0, "expressions": 0} def generate_speech_report(transcription, context, audio_emotion): system_message = f"Context: \"{context}\". Evaluate if emotions in audio match. Emotion data: {audio_emotion}." user_message = "Provide a short one paragraph report on the emotional appropriateness of speech..." return generate_report(system_message, user_message) def generate_expression_report(emotion_analysis_str): system_message = f"Evaluate the following emotion data: {emotion_analysis_str}." user_message = "Provide a short one paragraph report on the emotional appropriateness of facial expressions..." return generate_report(system_message, user_message) if __name__ == '__main__': app.run(debug=True)