Voice-To-Text / app.py
lodhrangpt's picture
Update app.py
a0f6236 verified
raw
history blame
5.65 kB
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
import re
# Attempt to download punkt tokenizer
try:
nltk.download("punkt")
except:
print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
def custom_sent_tokenize(text):
return text.split(". ")
def transcribe(audio_path):
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": "Bearer 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", # Replace with your actual API key
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_exam_paper(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return create_error_pdf(f"API Error: {error_msg}")
def generate_exam_paper(transcript):
try:
sentences = sent_tokenize(transcript)
except LookupError:
sentences = custom_sent_tokenize(transcript)
# Extract important sentences for generating questions
important_sentences = get_important_sentences(sentences)
# Generate exam-like questions
long_questions = [f"Explain the historical significance of '{sentence}'?" for sentence in important_sentences[:5]]
short_questions = [f"What is the definition of '{sentence.split()[0]}'?" for sentence in important_sentences[:5]]
mcqs = generate_mcqs(important_sentences)
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
return pdf_path
def get_important_sentences(sentences):
# Focus on sentences that are likely to contain key information (like facts or definitions)
important_sentences = []
for sentence in sentences:
# Simplified heuristic: sentences with important nouns/verbs
if len(re.findall(r'\b(NN|VB)\b', sentence)): # Using POS tags to detect nouns/verbs
important_sentences.append(sentence)
return important_sentences[:5] # Limit to top 5 important sentences
def generate_mcqs(important_sentences):
mcqs = []
for sentence in important_sentences:
# Generate MCQs from the sentence context
key_terms = sentence.split() # Simple tokenization
correct_answer = random.choice(key_terms) # Select a key term as the answer
options = [correct_answer] + random.sample(key_terms, 3) # Select distractors from the sentence
random.shuffle(options) # Shuffle the options
mcq = {
"question": f"What is '{correct_answer}' in the context of the sentence?",
"options": options,
"answer": correct_answer
}
mcqs.append(mcq)
return mcqs
def create_pdf(transcript, long_questions, short_questions, mcqs):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Exam Paper: Transcription Notes", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
# Add Long Questions Section
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Long Questions", ln=True)
pdf.set_font("Arial", "", 12)
for i, question in enumerate(long_questions, 1):
pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
# Add Short Questions Section
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Short Questions", ln=True)
pdf.set_font("Arial", "", 12)
for i, question in enumerate(short_questions, 1):
pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
# Add MCQs Section
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
pdf.set_font("Arial", "", 12)
for i, mcq in enumerate(mcqs, 1):
pdf.multi_cell(0, 10, f"{i}. {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
for option in mcq["options"]:
pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
# Save the generated PDF to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
pdf_path = temp_pdf.name
return pdf_path
def create_error_pdf(message):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Error Report", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
error_pdf_path = temp_pdf.name
return error_pdf_path
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download Exam Paper (PDF)"),
title="Voice to Text Converter and Exam Paper Generator",
)
iface.launch()