Voice-To-Text

Sleeping

App Files Files Community

Voice-To-Text / app.py

lodhrangpt

Update app.py

1bcb7e9 verified about 1 year ago

raw

history blame

3.97 kB

	import gradio as gr
	import requests
	from fpdf import FPDF
	import nltk
	from nltk.tokenize import sent_tokenize
	import random
	import os

	# Ensure nltk resources are downloaded
	nltk.download("punkt")

	# Function to send audio to Groq API and get transcription
	def transcribe(audio_path):
	# Read audio file in binary mode
	with open(audio_path, "rb") as audio_file:
	audio_data = audio_file.read()

	# Groq API endpoint for audio transcription
	groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"

	headers = {
	"Authorization": "Bearer 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", # Replace with your actual API key
	}

	files = {
	'file': ('audio.wav', audio_data, 'audio/wav'),
	}
	data = {
	'model': 'whisper-large-v3-turbo',
	'response_format': 'json',
	'language': 'en',
	}

	# Send audio to Groq API
	response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)

	if response.status_code == 200:
	result = response.json()
	transcript = result.get("text", "No transcription available.")
	return generate_notes(transcript)
	else:
	error_msg = response.json().get("error", {}).get("message", "Unknown error.")
	print(f"API Error: {error_msg}")
	return None # Indicate failure

	# Function to generate notes and questions
	def generate_notes(transcript):
	# Split transcript into sentences
	sentences = sent_tokenize(transcript)

	# Generate long and short questions
	long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
	short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]

	# Generate MCQs
	mcqs = []
	for sentence in sentences[:5]:
	mcq = {
	"question": f"What is '{sentence.split()[0]}'?",
	"options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
	"answer": sentence.split()[0]
	}
	mcqs.append(mcq)

	# Create PDF
	pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
	return pdf_path

	# Function to create and save PDF
	def create_pdf(transcript, long_questions, short_questions, mcqs):
	pdf = FPDF()
	pdf.add_page()

	# Title
	pdf.set_font("Arial", "B", 16)
	pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")

	# Transcription
	pdf.set_font("Arial", "", 12)
	pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n")

	# Long Questions
	pdf.set_font("Arial", "B", 14)
	pdf.cell(200, 10, "Long Questions", ln=True)
	pdf.set_font("Arial", "", 12)
	for question in long_questions:
	pdf.multi_cell(0, 10, f"- {question}\n")

	# Short Questions
	pdf.set_font("Arial", "B", 14)
	pdf.cell(200, 10, "Short Questions", ln=True)
	pdf.set_font("Arial", "", 12)
	for question in short_questions:
	pdf.multi_cell(0, 10, f"- {question}\n")

	# MCQs
	pdf.set_font("Arial", "B", 14)
	pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
	pdf.set_font("Arial", "", 12)
	for mcq in mcqs:
	pdf.multi_cell(0, 10, f"Q: {mcq['question']}")
	for option in mcq["options"]:
	pdf.multi_cell(0, 10, f" - {option}")
	pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n")

	# Save PDF
	pdf_path = "/mnt/data/transcription_notes.pdf"
	pdf.output(pdf_path)

	return pdf_path

	# Gradio interface
	def gradio_interface(audio_path):
	pdf_path = transcribe(audio_path)
	if pdf_path:
	return pdf_path
	else:
	return "Error: Unable to process the audio file. Please check the API key and try again."

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.File(label="Download PDF with Notes and Questions"),
	title="Voice to Text Converter and Notes Generator",
	)

	iface.launch()