Spaces:
Sleeping
Sleeping
File size: 3,967 Bytes
309b067 ae43f08 042bc75 309b067 1bcb7e9 93cef8c 1bcb7e9 f3f5ab6 93cef8c 1bcb7e9 f3f5ab6 1bcb7e9 ae43f08 85eb5ef a78e93c 1bcb7e9 a78e93c f3f5ab6 042bc75 ae43f08 0fe9a40 1bcb7e9 a78e93c 1bcb7e9 ae43f08 1bcb7e9 ae43f08 f3f5ab6 1bcb7e9 85eb5ef 042bc75 ae43f08 1bcb7e9 309b067 1bcb7e9 042bc75 309b067 042bc75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import requests
from fpdf import FPDF
import nltk
from nltk.tokenize import sent_tokenize
import random
import os
# Ensure nltk resources are downloaded
nltk.download("punkt")
# Function to send audio to Groq API and get transcription
def transcribe(audio_path):
# Read audio file in binary mode
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
# Groq API endpoint for audio transcription
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": "Bearer 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", # Replace with your actual API key
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
# Send audio to Groq API
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return None # Indicate failure
# Function to generate notes and questions
def generate_notes(transcript):
# Split transcript into sentences
sentences = sent_tokenize(transcript)
# Generate long and short questions
long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
# Generate MCQs
mcqs = []
for sentence in sentences[:5]:
mcq = {
"question": f"What is '{sentence.split()[0]}'?",
"options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
"answer": sentence.split()[0]
}
mcqs.append(mcq)
# Create PDF
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
return pdf_path
# Function to create and save PDF
def create_pdf(transcript, long_questions, short_questions, mcqs):
pdf = FPDF()
pdf.add_page()
# Title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
# Transcription
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n")
# Long Questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Long Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in long_questions:
pdf.multi_cell(0, 10, f"- {question}\n")
# Short Questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Short Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in short_questions:
pdf.multi_cell(0, 10, f"- {question}\n")
# MCQs
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
pdf.set_font("Arial", "", 12)
for mcq in mcqs:
pdf.multi_cell(0, 10, f"Q: {mcq['question']}")
for option in mcq["options"]:
pdf.multi_cell(0, 10, f" - {option}")
pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n")
# Save PDF
pdf_path = "/mnt/data/transcription_notes.pdf"
pdf.output(pdf_path)
return pdf_path
# Gradio interface
def gradio_interface(audio_path):
pdf_path = transcribe(audio_path)
if pdf_path:
return pdf_path
else:
return "Error: Unable to process the audio file. Please check the API key and try again."
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes and Questions"),
title="Voice to Text Converter and Notes Generator",
)
iface.launch()
|