import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
import re

# Attempt to download punkt tokenizer
try:
    nltk.download("punkt")
except:
    print("NLTK punkt tokenizer download failed. Using custom tokenizer.")

def custom_sent_tokenize(text):
    return text.split(". ")

def transcribe(audio_path):
    with open(audio_path, "rb") as audio_file:
        audio_data = audio_file.read()

    groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
    headers = {
        "Authorization": "Bearer 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",  # Replace with your actual API key
    }
    files = {
        'file': ('audio.wav', audio_data, 'audio/wav'),
    }
    data = {
        'model': 'whisper-large-v3-turbo',
        'response_format': 'json',
        'language': 'en',
    }

    response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)

    if response.status_code == 200:
        result = response.json()
        transcript = result.get("text", "No transcription available.")
        return generate_exam_paper(transcript)
    else:
        error_msg = response.json().get("error", {}).get("message", "Unknown error.")
        print(f"API Error: {error_msg}")
        return create_error_pdf(f"API Error: {error_msg}")

def generate_exam_paper(transcript):
    try:
        sentences = sent_tokenize(transcript)
    except LookupError:
        sentences = custom_sent_tokenize(transcript)

    # Extract important sentences for generating questions
    important_sentences = get_important_sentences(sentences)
    
    # Generate exam-like questions
    long_questions = generate_long_questions(important_sentences)
    short_questions = generate_short_questions(important_sentences)
    mcqs = generate_mcqs(important_sentences)

    # Ensure there are exactly 2 long questions, 5 short questions, and 7 MCQs
    long_questions = long_questions[:2]  # Limit to 2 long questions
    short_questions = short_questions[:5]  # Limit to 5 short questions
    mcqs = mcqs[:7]  # Limit to 7 MCQs

    pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
    return pdf_path

def get_important_sentences(sentences):
    # Focus on sentences that are likely to contain key information (like facts or definitions)
    important_sentences = []
    for sentence in sentences:
        # Simplified heuristic: sentences with important nouns/verbs
        if len(re.findall(r'\b(NN|VB)\b', sentence)):  # Using POS tags to detect nouns/verbs
            important_sentences.append(sentence)
    return important_sentences

def generate_long_questions(important_sentences):
    long_questions = []
    for sentence in important_sentences[:2]:  # Limit to 2 long questions
        long_questions.append(f"Explain the historical significance of '{sentence}'?")
    return long_questions

def generate_short_questions(important_sentences):
    short_questions = []
    for sentence in important_sentences[:5]:  # Limit to 5 short questions
        # Use the first word of the sentence to create short questions
        short_questions.append(f"What is the definition of '{sentence.split()[0]}'?")
    return short_questions

def generate_mcqs(important_sentences):
    mcqs = []
    for sentence in important_sentences[:7]:  # Limit to 7 MCQs
        # Generate MCQs from the sentence context
        key_terms = sentence.split()  # Simple tokenization
        correct_answer = random.choice(key_terms)  # Select a key term as the answer
        options = [correct_answer] + random.sample(key_terms, 3)  # Select distractors from the sentence
        random.shuffle(options)  # Shuffle the options
        mcq = {
            "question": f"What is '{correct_answer}' in the context of the sentence?",
            "options": options,
            "answer": correct_answer
        }
        mcqs.append(mcq)
    return mcqs

def create_pdf(transcript, long_questions, short_questions, mcqs):
    pdf = FPDF()
    pdf.add_page()

    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Exam Paper: Transcription Notes", ln=True, align="C")

    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")

    # Add Long Questions Section
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Long Questions", ln=True)
    pdf.set_font("Arial", "", 12)
    for i, question in enumerate(long_questions, 1):
        pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")

    # Add Short Questions Section
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Short Questions", ln=True)
    pdf.set_font("Arial", "", 12)
    for i, question in enumerate(short_questions, 1):
        pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")

    # Add MCQs Section
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
    pdf.set_font("Arial", "", 12)
    for i, mcq in enumerate(mcqs, 1):
        pdf.multi_cell(0, 10, f"{i}. {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
        for option in mcq["options"]:
            pdf.multi_cell(0, 10, f"   - {option.encode('latin1', 'replace').decode('latin1')}")
        pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")

    # Save the generated PDF to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        pdf.output(temp_pdf.name)
        pdf_path = temp_pdf.name

    return pdf_path

def create_error_pdf(message):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Error Report", ln=True, align="C")
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))

    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        pdf.output(temp_pdf.name)
        error_pdf_path = temp_pdf.name

    return error_pdf_path

iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.File(label="Download Exam Paper (PDF)"),
    title="Voice to Text Converter and Exam Paper Generator",
)

iface.launch()