Spaces:
Sleeping
Sleeping
File size: 5,147 Bytes
bcd1dcf 1507087 bcd1dcf 1507087 bcd1dcf dfbfcd7 bcd1dcf ce01ec7 a0f6236 bcd1dcf af44622 bcd1dcf 1507087 af44622 bcd1dcf 36def4c bcd1dcf 36def4c bcd1dcf af44622 36def4c af44622 bcd1dcf af44622 36def4c bcd1dcf 36def4c bcd1dcf 36def4c bcd1dcf 36def4c bcd1dcf af44622 bcd1dcf 36def4c bcd1dcf af44622 bcd1dcf 36def4c bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 36def4c bcd1dcf 612bb17 78a2aef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
# Attempt to download punkt tokenizer
try:
nltk.download("punkt")
except:
print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
def custom_sent_tokenize(text):
return text.split(". ")
def transcribe(audio_path):
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": "Bearer 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", # Replace with your actual API key
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return create_error_pdf(f"API Error: {error_msg}")
def generate_notes(transcript):
try:
sentences = sent_tokenize(transcript)
except LookupError:
sentences = custom_sent_tokenize(transcript)
# Generate long questions
long_questions = [f"Explain the concept discussed in: '{sentence}'." for sentence in sentences[:5]]
# Generate short questions
short_questions = [f"What does '{sentence.split()[0]}' mean in the context of this text?" for sentence in sentences[:5]]
# Generate MCQs with relevant distractors
mcqs = []
for sentence in sentences[:5]:
if len(sentence.split()) > 1: # Ensure there are enough words to create meaningful options
key_word = sentence.split()[0] # Use the first word as a key term
distractors = ["Term A", "Term B", "Term C"] # Replace with relevant terms if needed
options = [key_word] + distractors
random.shuffle(options) # Shuffle options for randomness
mcq = {
"question": f"What is '{key_word}' based on the context?",
"options": options,
"answer": key_word
}
mcqs.append(mcq)
# Generate and save a structured PDF
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
return pdf_path
def create_pdf(transcript, long_questions, short_questions, mcqs):
pdf = FPDF()
pdf.add_page()
# Add title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes and Questions", ln=True, align="C")
# Add transcription content
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
# Add long questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Long Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in long_questions:
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
# Add short questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Short Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in short_questions:
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
# Add MCQs
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
pdf.set_font("Arial", "", 12)
for mcq in mcqs:
pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
for option in mcq["options"]:
pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
pdf_path = temp_pdf.name
return pdf_path
def create_error_pdf(message):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Error Report", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
error_pdf_path = temp_pdf.name
return error_pdf_path
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes or Error Report"),
title="Voice to Text Converter and Notes Generator",
description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
)
iface.launch()
|