Spaces:
Sleeping
Sleeping
File size: 3,874 Bytes
bcd1dcf 1507087 bcd1dcf 1507087 bcd1dcf 632a590 e584a9f bcd1dcf dfbfcd7 bcd1dcf e584a9f a0f6236 bcd1dcf af44622 bcd1dcf 1507087 af44622 e584a9f 2b0dd62 e584a9f 2b0dd62 36def4c e584a9f af44622 bcd1dcf e584a9f bcd1dcf af44622 36def4c bcd1dcf 36def4c bcd1dcf 36def4c bcd1dcf 7fd87d1 9ac7792 7fd87d1 6f8e05f bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 bcd1dcf af44622 36def4c bcd1dcf 612bb17 78a2aef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
from groq import Groq
# Ensure no unexpected indentation here
api_key = os.environ.get("GROQ_API_KEY")
# Attempt to download punkt tokenizer
try:
nltk.download("punkt")
except:
print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
def custom_sent_tokenize(text):
return text.split(". ")
def transcribe(audio_path):
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": f"Bearer {api_key}", # Fix: api_key is used properly
}
files = {
'file': ('audio.wav', audio_data, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo',
'response_format': 'json',
'language': 'en',
}
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
if response.status_code == 200:
result = response.json()
transcript = result.get("text", "No transcription available.")
return generate_notes(transcript)
else:
error_msg = response.json().get("error", {}).get("message", "Unknown error.")
print(f"API Error: {error_msg}")
return create_error_pdf(f"API Error: {error_msg}")
def generate_notes(transcript):
client = Groq(api_key=api_key) # Use the api_key here
chat_completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "you are expert question generator from content. Generate one long question, possible number of short questions and mcqs. plz also provide the notes"
},
{
"role": "user",
"content": transcript,
}
],
model="llama3-8b-8192",
temperature=0.5,
max_tokens=1024,
top_p=1,
stop=None,
stream=False,
)
res = chat_completion.choices[0].message.content
# Generate and save a structured PDF
pdf_path = create_pdf(res, transcript)
return pdf_path
def create_pdf(question, transcript):
pdf = FPDF()
pdf.add_page()
# Add title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Transcription Notes and Questions", ln=True, align="C")
# Add transcription content
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
# Add long questions
pdf.set_font("Arial", "B", 14)
pdf.cell(200, 10, "Questions", ln=True)
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
pdf_path = temp_pdf.name
return pdf_path
def create_error_pdf(message):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Error Report", ln=True, align="C")
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf.output(temp_pdf.name)
error_pdf_path = temp_pdf.name
return error_pdf_path
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(label="Download PDF with Notes or Error Report"),
title="Voice to Text Converter and Notes Generator",
description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
)
iface.launch()
|