Spaces:

Aurum79
/

Eloquence-Backend

Runtime error

File size: 11,190 Bytes

from flask import Flask, request, jsonify
import pymongo
from routes.auth_routes import auth_bp
from flask_cors import CORS
import os
from werkzeug.utils import secure_filename
from utils.audioextraction import extract_audio
from utils.expressions import analyze_video_emotions
from utils.transcription import speech_to_text_long
from utils.vocals import predict_emotion
from utils.vocabulary import evaluate_vocabulary
from groq import Groq
import pandas as pd
from bson import ObjectId
import json
from dotenv import load_dotenv
from datetime import datetime
from utils.models import load_models

load_dotenv()
app = Flask(__name__)
CORS(app)

# Load models on startup
models = load_models()

# MongoDB connection
client = pymongo.MongoClient("mongodb+srv://pmsankheb23:[email protected]/")
db = client["Eloquence"]
collections_user = db["user"]
reports_collection = db["reports"]
overall_reports_collection = db["overall_reports"]

# Groq client setup
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Configure upload folder
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'mp4', 'webm', 'wav'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def convert_objectid_to_string(data):
    if isinstance(data, dict):
        new_dict = {}
        for k, v in data.items():
            if isinstance(v, datetime):
                new_dict[k] = v.isoformat()
            else:
                new_dict[k] = convert_objectid_to_string(v)
        return new_dict
    elif isinstance(data, list):
        return [convert_objectid_to_string(item) for item in data]
    elif isinstance(data, ObjectId):
        return str(data)
    return data

app.register_blueprint(auth_bp)

@app.route('/')
def home():
    return "Hello World"

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    file = request.files['file']
    context = request.form.get('context', '')
    title = request.form.get('title', 'Untitled Session')
    mode = request.form.get('mode', 'video')
    user_id = request.form.get('userId')

    if not user_id:
        return jsonify({"error": "User ID is required"}), 400

    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)

        audio_path = os.path.join(app.config['UPLOAD_FOLDER'], 'output.wav')
        if not extract_audio(file_path, audio_path):
            os.remove(file_path)
            return jsonify({"error": "Failed to process audio from the file"}), 500

        emotion_analysis = pd.DataFrame()
        if mode == "video":
            emotion_analysis = analyze_video_emotions(file_path, models["fer"])

        transcription = speech_to_text_long(audio_path, models["whisper"])
        audio_emotion = predict_emotion(
            audio_path,
            models["emotion_model"],
            models["emotion_feature_extractor"],
            models["emotion_id2label"],
        )
        vocabulary_report = evaluate_vocabulary(transcription, context)
        scores = generate_scores(transcription, audio_emotion, emotion_analysis)
        speech_report = generate_speech_report(transcription, context, audio_emotion)
        expression_report = generate_expression_report(emotion_analysis) if mode == "video" else "No expression analysis for audio-only mode."

        report_data = {
            "userId": user_id,
            "title": title,
            "context": context,
            "transcription": transcription,
            "vocabulary_report": vocabulary_report,
            "speech_report": speech_report,
            "expression_report": expression_report,
            "scores": scores,
            "createdAt": datetime.utcnow()
        }

        result = reports_collection.insert_one(report_data)
        report_data["_id"] = str(result.inserted_id)
        update_overall_reports(user_id)

        os.remove(file_path)
        os.remove(audio_path)

        return jsonify(convert_objectid_to_string(report_data)), 200
    return jsonify({"error": "File type not allowed"}), 400

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.get_json()
        user_id = data.get('userId')
        user_message = data.get('message')

        if not user_id or not user_message:
            return jsonify({"error": "User ID and message are required"}), 400

        user_reports = list(reports_collection.find({"userId": user_id}))
        reports_summary = "Here is a summary of the user's past performance:\n"
        for report in user_reports:
            reports_summary += f"- Session '{report.get('title', 'Untitled')}':\n"
            reports_summary += f"  - Vocabulary Score: {report['scores']['vocabulary']}\n"
            reports_summary += f"  - Voice Score: {report['scores']['voice']}\n"
            reports_summary += f"  - Expressions Score: {report['scores']['expressions']}\n"
            reports_summary += f"  - Feedback: {report.get('speech_report', '')}\n\n"

        system_message = f"""
        You are 'Eloquence AI', a friendly and expert public speaking coach. Your goal is to help users improve their speaking skills by providing constructive, encouraging, and actionable feedback.

        User's Past Performance Summary:
        {reports_summary}

        Based on this history and the user's current message, provide a helpful and encouraging response. Be conversational and supportive.
        """

        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
            model="llama3-70b-8192",
        )

        ai_response = chat_completion.choices[0].message.content
        return jsonify({"response": ai_response}), 200

    except Exception as e:
        print(f"Error in /chat endpoint: {e}")
        return jsonify({"error": "An internal error occurred"}), 500

def update_overall_reports(user_id):
    user_reports = list(reports_collection.find({"userId": user_id}))
    if not user_reports:
        return

    num_reports = len(user_reports)
    avg_vocabulary = sum(r["scores"]["vocabulary"] for r in user_reports) / num_reports
    avg_voice = sum(r["scores"]["voice"] for r in user_reports) / num_reports
    avg_expressions = sum(r["scores"]["expressions"] for r in user_reports) / num_reports

    overall_report_data = {
        "userId": user_id,
        "avg_vocabulary": avg_vocabulary,
        "avg_voice": avg_voice,
        "avg_expressions": avg_expressions,
        "overall_reports": generate_overall_reports(user_reports)
    }

    overall_reports_collection.update_one(
        {"userId": user_id},
        {"$set": overall_report_data},
        upsert=True
    )

@app.route('/user-reports-list', methods=['GET'])
def get_user_reports_list():
    user_id = request.args.get('userId')
    if not user_id:
        return jsonify({"error": "User ID is required"}), 400
    user_reports = list(reports_collection.find({"userId": user_id}))
    if not user_reports:
        return jsonify([]), 200
    return jsonify(convert_objectid_to_string(user_reports)), 200

@app.route('/user-reports', methods=['GET'])
def get_user_reports():
    user_id = request.args.get('userId')
    if not user_id:
        return jsonify({"error": "User ID is required"}), 400
    overall_report = overall_reports_collection.find_one({"userId": user_id})
    if not overall_report:
        return jsonify({"error": "No overall report found for the user"}), 404
    return jsonify(convert_objectid_to_string(overall_report)), 200

def generate_report(system_message, user_message):
    try:
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
            ],
            model="llama3-70b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error generating report: {e}")
        return "Could not generate report due to an API error."

def generate_overall_reports(user_reports):
    reports_json = json.dumps(convert_objectid_to_string(user_reports), indent=2)
    voice_report = generate_report(
        "You are an expert in speech analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Voice..."
    )
    expressions_report = generate_report(
        "You are an expert in facial expression analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Facial Expressions..."
    )
    vocabulary_report = generate_report(
        "You are an expert in language and vocabulary analysis...",
        f"Reports: {reports_json}\nProvide a short one paragraph report summarizing the user's overall performance in Vocabulary..."
    )
    return {
        "voice_report": voice_report,
        "expressions_report": expressions_report,
        "vocabulary_report": vocabulary_report,
    }

def generate_scores(transcription, audio_emotion, emotion_analysis):
    system_message = """
    You are an expert in speech analysis. Based on the provided data, generate scores (out of 100) for Vocabulary, Voice, and Expressions.
    Provide only the three scores in JSON format, like:
    {"vocabulary": 85, "voice": 78, "expressions": 90}
    """
    emotion_str = emotion_analysis.to_string(index=False) if not emotion_analysis.empty else "No facial data"
    user_message = f"""
    Transcription: {transcription}
    Audio Emotion Data: {audio_emotion}
    Facial Emotion Analysis: {emotion_str}
    Provide only the JSON output with numeric scores.
    """
    report_content = generate_report(system_message, user_message)
    try:
        return json.loads(report_content)
    except (json.JSONDecodeError, TypeError):
        return {"vocabulary": 0, "voice": 0, "expressions": 0}

def generate_speech_report(transcription, context, audio_emotion):
    system_message = f"Context: \"{context}\". Evaluate if emotions in audio match. Emotion data: {audio_emotion}."
    user_message = "Provide a short one paragraph report on the emotional appropriateness of speech..."
    return generate_report(system_message, user_message)

def generate_expression_report(emotion_analysis_str):
    system_message = f"Evaluate the following emotion data: {emotion_analysis_str}."
    user_message = "Provide a short one paragraph report on the emotional appropriateness of facial expressions..."
    return generate_report(system_message, user_message)

if __name__ == '__main__':
    app.run(debug=True)