Spaces:
Sleeping
Sleeping
File size: 5,128 Bytes
309b067 ae43f08 042bc75 5cc42b9 6b80a5d 042bc75 6b80a5d 427442a 6b80a5d 427442a 6b80a5d 427442a 6b80a5d 309b067 b4c43c2 93cef8c f3f5ab6 93cef8c f3f5ab6 ae43f08 bbee055 a78e93c 1bcb7e9 a78e93c f3f5ab6 042bc75 ae43f08 0fe9a40 a78e93c 1bcb7e9 ae43f08 1bcb7e9 ae43f08 f3f5ab6 1bcb7e9 bbee055 85eb5ef 6b80a5d b4c43c2 6b80a5d b4c43c2 6b80a5d 042bc75 6b80a5d 042bc75 6b80a5d 042bc75 6b80a5d 042bc75 6b80a5d 45f7b8d 042bc75 19e07c9 042bc75 19e07c9 042bc75 19e07c9 042bc75 19e07c9 042bc75 19e07c9 042bc75 5cc42b9 042bc75 19e07c9 309b067 bbee055 042bc75 bbee055 042bc75 309b067 2047700 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import tempfile
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import random
# Download necessary NLTK data
try:
nltk.download("punkt")
nltk.download("stopwords")
except:
print("NLTK data download failed.")
stop_words = set(stopwords.words("english"))
def custom_sent_tokenize(text):
return text.split(". ")
def transcribe(audio_path):
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": "Bearer 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", # Replace with your actual API key
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return create_error_pdf(f"API Error: {error_msg}")
def extract_key_sentences(transcript):
try:
sentences = sent_tokenize(transcript)
except LookupError:
sentences = custom_sent_tokenize(transcript)
important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5]
return top_sentences
def generate_questions(sentences):
long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
mcqs = []
for sentence in sentences[:5]:
words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]
if not words:
continue
key_word = random.choice(words)
mcq = {
"question": f"What is '{key_word}'?",
"options": [key_word] + random.sample(["Option A", "Option B", "Option C"], 3),
"answer": key_word
}
mcqs.append(mcq)
return long_questions, short_questions, mcqs
def generate_notes(transcript):
key_sentences = extract_key_sentences(transcript)
long_questions, short_questions, mcqs = generate_questions(key_sentences)
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
return pdf_path
def create_pdf(transcript, long_questions, short_questions, mcqs):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Long Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in long_questions:
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Short Questions", ln=True)
pdf.set_font("Arial", "", 12)
for question in short_questions:
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
pdf.set_font("Arial", "", 12)
for mcq in mcqs:
pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
for option in mcq["options"]:
pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
pdf_path = temp_pdf.name
return pdf_path
def create_error_pdf(message):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Error Report", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
error_pdf_path = temp_pdf.name
return error_pdf_path
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes or Error Report"),
title="Voice to Text Converter and Notes Generator",
)
iface.launch()
|