Spaces:
Sleeping
Sleeping
File size: 6,944 Bytes
bcd1dcf 1507087 bcd1dcf 1507087 bcd1dcf 632a590 e584a9f bcd1dcf dfbfcd7 bcd1dcf 778fecb a0f6236 bcd1dcf af44622 bcd1dcf 1507087 af44622 778fecb 2b0dd62 778fecb 2b0dd62 778fecb 36def4c 778fecb af44622 bcd1dcf 778fecb bcd1dcf af44622 36def4c bcd1dcf 36def4c bcd1dcf 36def4c bcd1dcf 7fd87d1 9ac7792 7fd87d1 6f8e05f bcd1dcf 778fecb bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 36def4c bcd1dcf 612bb17 778fecb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
from groq import Groq
api_key = os.environ.get("GROQ_API_KEY")
# Attempt to download punkt tokenizer
try:
nltk.download("punkt")
except:
print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
def custom_sent_tokenize(text):
return text.split(". ")
def transcribe(audio_path):
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": f"Bearer {api_key}", # Fix: api_key is used properly
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return create_error_pdf(f"API Error: {error_msg}")
def generate_notes(transcript):
# try:
# sentences = sent_tokenize(transcript)
# except LookupError:
# sentences = custom_sent_tokenize(transcript)
# # Generate long questions
# long_questions = [f"Explain the concept discussed in: '{sentence}'." for sentence in sentences[:5]]
# # Generate short questions
# short_questions = [f"What does '{sentence.split()[0]}' mean in the context of this text?" for sentence in sentences[:5]]
# # Generate MCQs with relevant distractors
# mcqs = []
# for sentence in sentences[:5]:
# if len(sentence.split()) > 1: # Ensure there are enough words to create meaningful options
# key_word = sentence.split()[0] # Use the first word as a key term
# distractors = ["Term A", "Term B", "Term C"] # Replace with relevant terms if needed
# options = [key_word] + distractors
# random.shuffle(options) # Shuffle options for randomness
# mcq = {
# "question": f"What is '{key_word}' based on the context?",
# "options": options,
# "answer": key_word
# }
# mcqs.append(mcq)
client = Groq(api_key="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")
chat_completion = client.chat.completions.create(
#
# Required parameters
#
messages=[
# Set an optional system message. This sets the behavior of the
# assistant and can be used to provide specific instructions for
# how it should behave throughout the conversation.
{
"role": "system",
"content": "you are expert question generator from content. Generate one long question,possible number of short questions and mcqs.plz also provide the notes"
},
# Set a user message for the assistant to respond to.
{
"role": "user",
"content": transcript,
}
],
# The language model which will generate the completion.
model="llama3-8b-8192",
#
# Optional parameters
#
# Controls randomness: lowering results in less random completions.
# As the temperature approaches zero, the model will become deterministic
# and repetitive.
temperature=0.5,
# The maximum number of tokens to generate. Requests can use up to
# 32,768 tokens shared between prompt and completion.
max_tokens=1024,
# Controls diversity via nucleus sampling: 0.5 means half of all
# likelihood-weighted options are considered.
top_p=1,
# A stop sequence is a predefined or user-specified text string that
# signals an AI to stop generating content, ensuring its responses
# remain focused and concise. Examples include punctuation marks and
# markers like "[end]".
stop=None,
# If set, partial message deltas will be sent.
stream=False,
)
# Print the completion returned by the LLM.
res=chat_completion.choices[0].message.content
# Generate and save a structured PDF
pdf_path = create_pdf(res,transcript)
return pdf_path
def create_pdf(question,transcript):
pdf = FPDF()
pdf.add_page()
# Add title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes and Questions", ln=True, align="C")
# Add transcription content
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
# Add long questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Questions", ln=True)
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
# # Add short questions
# pdf.set_font("Arial", "B", 14)
# pdf.cell(200, 10, "Short Questions", ln=True)
# pdf.set_font("Arial", "", 12)
# for question in short_questions:
# pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
# # Add MCQs
# pdf.set_font("Arial", "B", 14)
# pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
# pdf.set_font("Arial", "", 12)
# for mcq in mcqs:
# pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
# for option in mcq["options"]:
# pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
# pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
pdf_path = temp_pdf.name
return pdf_path
def create_error_pdf(message):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Error Report", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
error_pdf_path = temp_pdf.name
return error_pdf_path
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes or Error Report"),
title="Voice to Text Converter and Notes Generator",
description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
)
iface.launch() |