Voice-To-Text / app.py
lodhrangpt's picture
Update app.py
1bcb7e9 verified
raw
history blame
3.97 kB
import gradio as gr
import requests
from fpdf import FPDF
import nltk
from nltk.tokenize import sent_tokenize
import random
import os
# Ensure nltk resources are downloaded
nltk.download("punkt")
# Function to send audio to Groq API and get transcription
def transcribe(audio_path):
# Read audio file in binary mode
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
# Groq API endpoint for audio transcription
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": "Bearer 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", # Replace with your actual API key
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
# Send audio to Groq API
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return None # Indicate failure
# Function to generate notes and questions
def generate_notes(transcript):
# Split transcript into sentences
sentences = sent_tokenize(transcript)
# Generate long and short questions
long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
# Generate MCQs
mcqs = []
for sentence in sentences[:5]:
mcq = {
"question": f"What is '{sentence.split()[0]}'?",
"options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
"answer": sentence.split()[0]
}
mcqs.append(mcq)
# Create PDF
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
return pdf_path
# Function to create and save PDF
def create_pdf(transcript, long_questions, short_questions, mcqs):
pdf = FPDF()
pdf.add_page()
# Title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
# Transcription
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n")
# Long Questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Long Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in long_questions:
pdf.multi_cell(0, 10, f"- {question}\n")
# Short Questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Short Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in short_questions:
pdf.multi_cell(0, 10, f"- {question}\n")
# MCQs
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
pdf.set_font("Arial", "", 12)
for mcq in mcqs:
pdf.multi_cell(0, 10, f"Q: {mcq['question']}")
for option in mcq["options"]:
pdf.multi_cell(0, 10, f" - {option}")
pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n")
# Save PDF
pdf_path = "/mnt/data/transcription_notes.pdf"
pdf.output(pdf_path)
return pdf_path
# Gradio interface
def gradio_interface(audio_path):
pdf_path = transcribe(audio_path)
if pdf_path:
return pdf_path
else:
return "Error: Unable to process the audio file. Please check the API key and try again."
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes and Questions"),
title="Voice to Text Converter and Notes Generator",
)
iface.launch()