Voice-To-Text

Running

File size: 4,112 Bytes

309b067
ae43f08
042bc75
 
 
 
 
 
 
 
309b067
1bcb7e9
93cef8c
1bcb7e9
f3f5ab6
 
93cef8c
f3f5ab6
ae43f08
bbee055
a78e93c
1bcb7e9
 
 
 
a78e93c
f3f5ab6
042bc75
 
ae43f08
0fe9a40
1bcb7e9
a78e93c
1bcb7e9
ae43f08
 
1bcb7e9
 
ae43f08
f3f5ab6
1bcb7e9
bbee055
85eb5ef
042bc75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbee055
042bc75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbee055
 
 
 
 
 
 
 
 
 
 
 
1bcb7e9
bbee055
309b067
bbee055
042bc75
bbee055
042bc75
309b067
 
042bc75

import gradio as gr
import requests
from fpdf import FPDF
import nltk
from nltk.tokenize import sent_tokenize
import random
import os

# Ensure nltk resources are downloaded
nltk.download("punkt")

# Function to send audio to Groq API and get transcription
def transcribe(audio_path):
    # Read audio file in binary mode
    with open(audio_path, "rb") as audio_file:
        audio_data = audio_file.read()

    groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
    headers = {
        "Authorization": "Bearer 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",  # Replace with your actual API key
    }

    files = {
        'file': ('audio.wav', audio_data, 'audio/wav'),
    }
    data = {
        'model': 'whisper-large-v3-turbo',
        'response_format': 'json',
        'language': 'en',
    }

    # Send audio to Groq API
    response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)

    if response.status_code == 200:
        result = response.json()
        transcript = result.get("text", "No transcription available.")
        return generate_notes(transcript)
    else:
        error_msg = response.json().get("error", {}).get("message", "Unknown error.")
        print(f"API Error: {error_msg}")
        return create_error_pdf(f"API Error: {error_msg}")

# Function to generate notes and questions
def generate_notes(transcript):
    # Split transcript into sentences
    sentences = sent_tokenize(transcript)

    # Generate long and short questions
    long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
    short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]

    # Generate MCQs
    mcqs = []
    for sentence in sentences[:5]:
        mcq = {
            "question": f"What is '{sentence.split()[0]}'?",
            "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
            "answer": sentence.split()[0]
        }
        mcqs.append(mcq)

    # Create PDF
    pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
    return pdf_path

# Function to create a PDF for transcription and questions
def create_pdf(transcript, long_questions, short_questions, mcqs):
    pdf = FPDF()
    pdf.add_page()
    
    # Title
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")

    # Transcription
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n")

    # Long Questions
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Long Questions", ln=True)
    pdf.set_font("Arial", "", 12)
    for question in long_questions:
        pdf.multi_cell(0, 10, f"- {question}\n")

    # Short Questions
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Short Questions", ln=True)
    pdf.set_font("Arial", "", 12)
    for question in short_questions:
        pdf.multi_cell(0, 10, f"- {question}\n")

    # MCQs
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
    pdf.set_font("Arial", "", 12)
    for mcq in mcqs:
        pdf.multi_cell(0, 10, f"Q: {mcq['question']}")
        for option in mcq["options"]:
            pdf.multi_cell(0, 10, f"   - {option}")
        pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n")

    # Save PDF
    pdf_path = "/mnt/data/transcription_notes.pdf"
    pdf.output(pdf_path)
    
    return pdf_path

# Function to create an error PDF
def create_error_pdf(message):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Error Report", ln=True, align="C")
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, message)
    
    error_pdf_path = "/mnt/data/error_report.pdf"
    pdf.output(error_pdf_path)
    return error_pdf_path

# Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.File(label="Download PDF with Notes or Error Report"),
    title="Voice to Text Converter and Notes Generator",
)

iface.launch()