lodhrangpt commited on
Commit
042bc75
·
verified ·
1 Parent(s): 293e06b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -12
app.py CHANGED
@@ -1,5 +1,13 @@
1
  import gradio as gr
2
  import requests
 
 
 
 
 
 
 
 
3
 
4
  # Function to send audio to Groq API and get transcription
5
  def transcribe(audio_path):
@@ -10,38 +18,101 @@ def transcribe(audio_path):
10
  # Groq API endpoint for audio transcription
11
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
12
 
13
- # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
14
  headers = {
15
  "Authorization": "Bearer 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",
16
  }
17
 
18
- # Prepare the files and data for the request
19
  files = {
20
  'file': ('audio.wav', audio_data, 'audio/wav'),
21
  }
22
  data = {
23
- 'model': 'whisper-large-v3-turbo', # Specify the model to use
24
- 'response_format': 'json', # Desired response format
25
- 'language': 'en', # Language of the audio
26
  }
27
 
28
  # Send audio to Groq API
29
  response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
30
 
31
- # Parse response
32
  if response.status_code == 200:
33
  result = response.json()
34
- return result.get("text", "No transcription available.")
 
35
  else:
36
  return f"Error: {response.status_code}, {response.text}"
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Gradio interface
39
  iface = gr.Interface(
40
  fn=transcribe,
41
- inputs=gr.Audio(type="filepath"), # Removed 'source' parameter for compatibility
42
- outputs="text",
43
- title="Voice to Text Converter App",
44
-
45
  )
46
 
47
- iface.launch()
 
1
  import gradio as gr
2
  import requests
3
+ from fpdf import FPDF
4
+ import nltk
5
+ from nltk.tokenize import sent_tokenize
6
+ import random
7
+ import os
8
+
9
+ # Ensure nltk resources are downloaded
10
+ nltk.download("punkt")
11
 
12
  # Function to send audio to Groq API and get transcription
13
  def transcribe(audio_path):
 
18
  # Groq API endpoint for audio transcription
19
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
20
 
 
21
  headers = {
22
  "Authorization": "Bearer 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",
23
  }
24
 
 
25
  files = {
26
  'file': ('audio.wav', audio_data, 'audio/wav'),
27
  }
28
  data = {
29
+ 'model': 'whisper-large-v3-turbo',
30
+ 'response_format': 'json',
31
+ 'language': 'en',
32
  }
33
 
34
  # Send audio to Groq API
35
  response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
36
 
 
37
  if response.status_code == 200:
38
  result = response.json()
39
+ transcript = result.get("text", "No transcription available.")
40
+ return generate_notes(transcript)
41
  else:
42
  return f"Error: {response.status_code}, {response.text}"
43
 
44
+ # Function to generate notes and questions
45
+ def generate_notes(transcript):
46
+ # Split transcript into sentences
47
+ sentences = sent_tokenize(transcript)
48
+
49
+ # Generate long and short questions
50
+ long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
51
+ short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
52
+
53
+ # Generate MCQs
54
+ mcqs = []
55
+ for sentence in sentences[:5]:
56
+ mcq = {
57
+ "question": f"What is '{sentence.split()[0]}'?",
58
+ "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
59
+ "answer": sentence.split()[0]
60
+ }
61
+ mcqs.append(mcq)
62
+
63
+ # Create PDF
64
+ pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
65
+ return pdf_path
66
+
67
+ # Function to create and save PDF
68
+ def create_pdf(transcript, long_questions, short_questions, mcqs):
69
+ pdf = FPDF()
70
+ pdf.add_page()
71
+
72
+ # Title
73
+ pdf.set_font("Arial", "B", 16)
74
+ pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
75
+
76
+ # Transcription
77
+ pdf.set_font("Arial", "", 12)
78
+ pdf.multi_cell(0, 10, f"Transcription:\n{transcript}\n\n")
79
+
80
+ # Long Questions
81
+ pdf.set_font("Arial", "B", 14)
82
+ pdf.cell(200, 10, "Long Questions", ln=True)
83
+ pdf.set_font("Arial", "", 12)
84
+ for question in long_questions:
85
+ pdf.multi_cell(0, 10, f"- {question}\n")
86
+
87
+ # Short Questions
88
+ pdf.set_font("Arial", "B", 14)
89
+ pdf.cell(200, 10, "Short Questions", ln=True)
90
+ pdf.set_font("Arial", "", 12)
91
+ for question in short_questions:
92
+ pdf.multi_cell(0, 10, f"- {question}\n")
93
+
94
+ # MCQs
95
+ pdf.set_font("Arial", "B", 14)
96
+ pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
97
+ pdf.set_font("Arial", "", 12)
98
+ for mcq in mcqs:
99
+ pdf.multi_cell(0, 10, f"Q: {mcq['question']}")
100
+ for option in mcq["options"]:
101
+ pdf.multi_cell(0, 10, f" - {option}")
102
+ pdf.multi_cell(0, 10, f"Answer: {mcq['answer']}\n")
103
+
104
+ # Save PDF
105
+ pdf_path = "/mnt/data/transcription_notes.pdf"
106
+ pdf.output(pdf_path)
107
+
108
+ return pdf_path
109
+
110
  # Gradio interface
111
  iface = gr.Interface(
112
  fn=transcribe,
113
+ inputs=gr.Audio(type="filepath"),
114
+ outputs=gr.File(label="Download PDF with Notes and Questions"),
115
+ title="Voice to Text Converter and Notes Generator",
 
116
  )
117
 
118
+ iface.launch()