import gradio as gr import requests from fpdf import FPDF import nltk from nltk.tokenize import sent_tokenize import random import os # Ensure nltk resources are downloaded nltk.download("punkt") # Function to send audio to Groq API and get transcription def transcribe(audio_path): # Read audio file in binary mode with open(audio_path, "rb") as audio_file: audio_data = audio_file.read() # Groq API endpoint for audio transcription groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions" headers = { "Authorization": "Bearer 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", # Replace with your actual API key } files = { 'file': ('audio.wav', audio_data, 'audio/wav'), } data = { 'model': 'whisper-large-v3-turbo', 'response_format': 'json', 'language': 'en', } # Send audio to Groq API response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data) if response.status_code == 200: result = response.json() transcript = result.get("text", "No transcription available.") return generate_notes(transcript) else: error_msg = response.json().get("error", {}).get("message", "Unknown error.") print(f"API Error: {error_msg}") return None # Indicate failure # Function to generate notes and questions def generate_notes(transcript): # Split transcript into sentences sentences = sent_tokenize(transcript) # Generate long and short questions long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]] short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]] # Generate MCQs mcqs = [] for sentence in sentences[:5]: mcq = { "question": f"What is '{sentence.split()[0]}'?", "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3), "answer": sentence.split()[0] } mcqs.append(mcq) # Create PDF pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs) return pdf_path # Function to create and save PDF def create_pdf(transcript, long_questions, short_questions, mcqs): pdf = FPDF() pdf.add_page() # Title pdf.set_font("Arial", "B", 16) pdf.cell(200, 10, "Transcription Notes", ln=True, align="C") # Transcription pdf.set_font("Arial", "", 12) pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n") # Long Questions pdf.set_font("Arial", "B", 14) pdf.cell(200, 10, "Long Questions", ln=True) pdf.set_font("Arial", "", 12) for question in long_questions: pdf.multi_cell(0, 10, f"- {question}\n") # Short Questions pdf.set_font("Arial", "B", 14) pdf.cell(200, 10, "Short Questions", ln=True) pdf.set_font("Arial", "", 12) for question in short_questions: pdf.multi_cell(0, 10, f"- {question}\n") # MCQs pdf.set_font("Arial", "B", 14) pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True) pdf.set_font("Arial", "", 12) for mcq in mcqs: pdf.multi_cell(0, 10, f"Q: {mcq['question']}") for option in mcq["options"]: pdf.multi_cell(0, 10, f" - {option}") pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n") # Save PDF pdf_path = "/mnt/data/transcription_notes.pdf" pdf.output(pdf_path) return pdf_path # Gradio interface def gradio_interface(audio_path): pdf_path = transcribe(audio_path) if pdf_path: return pdf_path else: return "Error: Unable to process the audio file. Please check the API key and try again." iface = gr.Interface( fn=gradio_interface, inputs=gr.Audio(type="filepath"), outputs=gr.File(label="Download PDF with Notes and Questions"), title="Voice to Text Converter and Notes Generator", ) iface.launch()