File size: 6,944 Bytes
bcd1dcf
1507087
bcd1dcf
1507087
bcd1dcf
 
 
 
632a590
e584a9f
bcd1dcf
 
 
 
 
 
 
 
 
 
 
 
dfbfcd7
 
bcd1dcf
778fecb
a0f6236
bcd1dcf
 
 
 
 
 
 
 
 
 
 
 
 
 
af44622
bcd1dcf
 
 
 
1507087
af44622
778fecb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b0dd62
 
778fecb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b0dd62
778fecb
36def4c
778fecb
af44622
bcd1dcf
778fecb
bcd1dcf
 
af44622
36def4c
bcd1dcf
36def4c
bcd1dcf
36def4c
bcd1dcf
7fd87d1
9ac7792
7fd87d1
 
 
 
6f8e05f
 
bcd1dcf
778fecb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcd1dcf
 
 
af44622
bcd1dcf
 
 
 
 
 
 
 
 
af44622
bcd1dcf
 
 
af44622
bcd1dcf
 
 
 
 
af44622
 
36def4c
bcd1dcf
612bb17
778fecb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import gradio as gr
import requests
from fpdf import FPDF
import nltk
import os
import tempfile
from nltk.tokenize import sent_tokenize
import random
from groq import Groq
api_key = os.environ.get("GROQ_API_KEY")
# Attempt to download punkt tokenizer
try:
    nltk.download("punkt")
except:
    print("NLTK punkt tokenizer download failed. Using custom tokenizer.")

def custom_sent_tokenize(text):
    return text.split(". ")

def transcribe(audio_path):
    with open(audio_path, "rb") as audio_file:
        audio_data = audio_file.read()

    groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
    headers = {
         "Authorization": f"Bearer {api_key}",  # Fix: api_key is used properly
    }
    files = {
        'file': ('audio.wav', audio_data, 'audio/wav'),
    }
    data = {
        'model': 'whisper-large-v3-turbo',
        'response_format': 'json',
        'language': 'en',
    }

    response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)

    if response.status_code == 200:
        result = response.json()
        transcript = result.get("text", "No transcription available.")
        return generate_notes(transcript)
    else:
        error_msg = response.json().get("error", {}).get("message", "Unknown error.")
        print(f"API Error: {error_msg}")
        return create_error_pdf(f"API Error: {error_msg}")

def generate_notes(transcript):
    # try:
    #     sentences = sent_tokenize(transcript)
    # except LookupError:
    #     sentences = custom_sent_tokenize(transcript)

    # # Generate long questions
    # long_questions = [f"Explain the concept discussed in: '{sentence}'." for sentence in sentences[:5]]

    # # Generate short questions
    # short_questions = [f"What does '{sentence.split()[0]}' mean in the context of this text?" for sentence in sentences[:5]]

    # # Generate MCQs with relevant distractors
    # mcqs = []
    # for sentence in sentences[:5]:
    #     if len(sentence.split()) > 1:  # Ensure there are enough words to create meaningful options
    #         key_word = sentence.split()[0]  # Use the first word as a key term
    #         distractors = ["Term A", "Term B", "Term C"]  # Replace with relevant terms if needed
    #         options = [key_word] + distractors
    #         random.shuffle(options)  # Shuffle options for randomness
    #         mcq = {
    #             "question": f"What is '{key_word}' based on the context?",
    #             "options": options,
    #             "answer": key_word
    #         }
    #         mcqs.append(mcq)
    client = Groq(api_key="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")

    chat_completion = client.chat.completions.create(
    #
    # Required parameters
    #
    messages=[
        # Set an optional system message. This sets the behavior of the
        # assistant and can be used to provide specific instructions for
        # how it should behave throughout the conversation.
        {
            "role": "system",
            "content": "you are expert question generator from content. Generate one long question,possible number of short questions and mcqs.plz also provide the notes"
        },
        # Set a user message for the assistant to respond to.
        {
            "role": "user",
            "content": transcript,
        }
    ],

    # The language model which will generate the completion.
    model="llama3-8b-8192",

    #
    # Optional parameters
    #

    # Controls randomness: lowering results in less random completions.
    # As the temperature approaches zero, the model will become deterministic
    # and repetitive.
    temperature=0.5,

    # The maximum number of tokens to generate. Requests can use up to
    # 32,768 tokens shared between prompt and completion.
    max_tokens=1024,

    # Controls diversity via nucleus sampling: 0.5 means half of all
    # likelihood-weighted options are considered.
    top_p=1,

    # A stop sequence is a predefined or user-specified text string that
    # signals an AI to stop generating content, ensuring its responses
    # remain focused and concise. Examples include punctuation marks and
    # markers like "[end]".
    stop=None,

    # If set, partial message deltas will be sent.
    stream=False,
)

# Print the completion returned by the LLM.
    res=chat_completion.choices[0].message.content
     

    # Generate and save a structured PDF
    pdf_path = create_pdf(res,transcript)
    return pdf_path

def create_pdf(question,transcript):
    pdf = FPDF()
    pdf.add_page()
    
    # Add title
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Transcription Notes and Questions", ln=True, align="C")

    # Add transcription content
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")

    # Add long questions
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Questions", ln=True)
    pdf.set_font("Arial", "", 12)
    
    pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")

    # # Add short questions
    # pdf.set_font("Arial", "B", 14)
    # pdf.cell(200, 10, "Short Questions", ln=True)
    # pdf.set_font("Arial", "", 12)
    # for question in short_questions:
    #     pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")

    # # Add MCQs
    # pdf.set_font("Arial", "B", 14)
    # pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
    # pdf.set_font("Arial", "", 12)
    # for mcq in mcqs:
    #     pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
    #     for option in mcq["options"]:
    #         pdf.multi_cell(0, 10, f"   - {option.encode('latin1', 'replace').decode('latin1')}")
    #     pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")

    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        pdf.output(temp_pdf.name)
        pdf_path = temp_pdf.name
    
    return pdf_path

def create_error_pdf(message):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Error Report", ln=True, align="C")
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        pdf.output(temp_pdf.name)
        error_pdf_path = temp_pdf.name
    
    return error_pdf_path

iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.File(label="Download PDF with Notes or Error Report"),
    title="Voice to Text Converter and Notes Generator",
    description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
)

iface.launch()