lodhrangpt commited on
Commit
8d9774d
·
verified ·
1 Parent(s): 78a2aef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -23,7 +23,7 @@ def transcribe(audio_path):
23
 
24
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
25
  headers = {
26
- "Authorization": "Bearer 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", # Replace with your actual API key
27
  }
28
  files = {
29
  'file': ('audio.wav', audio_data, 'audio/wav'),
@@ -39,24 +39,24 @@ def transcribe(audio_path):
39
  if response.status_code == 200:
40
  result = response.json()
41
  transcript = result.get("text", "No transcription available.")
42
- return generate_notes(transcript)
43
  else:
44
  error_msg = response.json().get("error", {}).get("message", "Unknown error.")
45
  print(f"API Error: {error_msg}")
46
  return create_error_pdf(f"API Error: {error_msg}")
47
 
48
- def generate_notes(transcript):
49
  try:
50
  sentences = sent_tokenize(transcript)
51
  except LookupError:
52
  sentences = custom_sent_tokenize(transcript)
53
 
54
- # Extract key sentences for generating questions
55
  important_sentences = get_important_sentences(sentences)
56
 
57
- # Generate long questions, short questions, and MCQs
58
- long_questions = [f"What is meant by '{sentence}'?" for sentence in important_sentences[:5]]
59
- short_questions = [f"Define '{sentence.split()[0]}'." for sentence in important_sentences[:5]]
60
 
61
  mcqs = generate_mcqs(important_sentences)
62
 
@@ -64,10 +64,10 @@ def generate_notes(transcript):
64
  return pdf_path
65
 
66
  def get_important_sentences(sentences):
67
- # Prioritize sentences that contain nouns or verbs to be more relevant
68
  important_sentences = []
69
  for sentence in sentences:
70
- # Simple rule: sentences with nouns/verbs are considered important
71
  if len(re.findall(r'\b(NN|VB)\b', sentence)): # Using POS tags to detect nouns/verbs
72
  important_sentences.append(sentence)
73
  return important_sentences[:5] # Limit to top 5 important sentences
@@ -75,11 +75,11 @@ def get_important_sentences(sentences):
75
  def generate_mcqs(important_sentences):
76
  mcqs = []
77
  for sentence in important_sentences:
78
- # Generate MCQs from meaningful sentences
79
- key_terms = sentence.split() # Split sentence into words (simple tokenization)
80
- correct_answer = random.choice(key_terms) # Randomly select a key term from the sentence
81
- options = [correct_answer] + random.sample(key_terms, 3) # Create multiple choice options
82
- random.shuffle(options) # Shuffle options
83
  mcq = {
84
  "question": f"What is '{correct_answer}' in the context of the sentence?",
85
  "options": options,
@@ -91,41 +91,42 @@ def generate_mcqs(important_sentences):
91
  def create_pdf(transcript, long_questions, short_questions, mcqs):
92
  pdf = FPDF()
93
  pdf.add_page()
94
-
95
  pdf.set_font("Arial", "B", 16)
96
- pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
97
 
98
  pdf.set_font("Arial", "", 12)
99
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
100
 
101
- # Add long questions section
102
  pdf.set_font("Arial", "B", 14)
103
  pdf.cell(200, 10, "Long Questions", ln=True)
104
  pdf.set_font("Arial", "", 12)
105
- for question in long_questions:
106
- pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
107
 
108
- # Add short questions section
109
  pdf.set_font("Arial", "B", 14)
110
  pdf.cell(200, 10, "Short Questions", ln=True)
111
  pdf.set_font("Arial", "", 12)
112
- for question in short_questions:
113
- pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
114
 
115
- # Add MCQs section
116
  pdf.set_font("Arial", "B", 14)
117
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
118
  pdf.set_font("Arial", "", 12)
119
- for mcq in mcqs:
120
- pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
121
  for option in mcq["options"]:
122
  pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
123
  pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
124
 
 
125
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
126
  pdf.output(temp_pdf.name)
127
  pdf_path = temp_pdf.name
128
-
129
  return pdf_path
130
 
131
  def create_error_pdf(message):
@@ -135,18 +136,18 @@ def create_error_pdf(message):
135
  pdf.cell(200, 10, "Error Report", ln=True, align="C")
136
  pdf.set_font("Arial", "", 12)
137
  pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
138
-
139
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
140
  pdf.output(temp_pdf.name)
141
  error_pdf_path = temp_pdf.name
142
-
143
  return error_pdf_path
144
 
145
  iface = gr.Interface(
146
  fn=transcribe,
147
  inputs=gr.Audio(type="filepath"),
148
- outputs=gr.File(label="Download PDF with Notes or Error Report"),
149
- title="Voice to Text Converter and Notes Generator",
150
  )
151
 
152
  iface.launch()
 
23
 
24
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
25
  headers = {
26
+ "Authorization": "Bearer YOUR_API_KEY", # Replace with your actual API key
27
  }
28
  files = {
29
  'file': ('audio.wav', audio_data, 'audio/wav'),
 
39
  if response.status_code == 200:
40
  result = response.json()
41
  transcript = result.get("text", "No transcription available.")
42
+ return generate_exam_paper(transcript)
43
  else:
44
  error_msg = response.json().get("error", {}).get("message", "Unknown error.")
45
  print(f"API Error: {error_msg}")
46
  return create_error_pdf(f"API Error: {error_msg}")
47
 
48
+ def generate_exam_paper(transcript):
49
  try:
50
  sentences = sent_tokenize(transcript)
51
  except LookupError:
52
  sentences = custom_sent_tokenize(transcript)
53
 
54
+ # Extract important sentences for generating questions
55
  important_sentences = get_important_sentences(sentences)
56
 
57
+ # Generate exam-like questions
58
+ long_questions = [f"Explain the historical significance of '{sentence}'?" for sentence in important_sentences[:5]]
59
+ short_questions = [f"What is the definition of '{sentence.split()[0]}'?" for sentence in important_sentences[:5]]
60
 
61
  mcqs = generate_mcqs(important_sentences)
62
 
 
64
  return pdf_path
65
 
66
  def get_important_sentences(sentences):
67
+ # Focus on sentences that are likely to contain key information (like facts or definitions)
68
  important_sentences = []
69
  for sentence in sentences:
70
+ # Simplified heuristic: sentences with important nouns/verbs
71
  if len(re.findall(r'\b(NN|VB)\b', sentence)): # Using POS tags to detect nouns/verbs
72
  important_sentences.append(sentence)
73
  return important_sentences[:5] # Limit to top 5 important sentences
 
75
  def generate_mcqs(important_sentences):
76
  mcqs = []
77
  for sentence in important_sentences:
78
+ # Generate MCQs from the sentence context
79
+ key_terms = sentence.split() # Simple tokenization
80
+ correct_answer = random.choice(key_terms) # Select a key term as the answer
81
+ options = [correct_answer] + random.sample(key_terms, 3) # Select distractors from the sentence
82
+ random.shuffle(options) # Shuffle the options
83
  mcq = {
84
  "question": f"What is '{correct_answer}' in the context of the sentence?",
85
  "options": options,
 
91
  def create_pdf(transcript, long_questions, short_questions, mcqs):
92
  pdf = FPDF()
93
  pdf.add_page()
94
+
95
  pdf.set_font("Arial", "B", 16)
96
+ pdf.cell(200, 10, "Exam Paper: Transcription Notes", ln=True, align="C")
97
 
98
  pdf.set_font("Arial", "", 12)
99
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
100
 
101
+ # Add Long Questions Section
102
  pdf.set_font("Arial", "B", 14)
103
  pdf.cell(200, 10, "Long Questions", ln=True)
104
  pdf.set_font("Arial", "", 12)
105
+ for i, question in enumerate(long_questions, 1):
106
+ pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
107
 
108
+ # Add Short Questions Section
109
  pdf.set_font("Arial", "B", 14)
110
  pdf.cell(200, 10, "Short Questions", ln=True)
111
  pdf.set_font("Arial", "", 12)
112
+ for i, question in enumerate(short_questions, 1):
113
+ pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
114
 
115
+ # Add MCQs Section
116
  pdf.set_font("Arial", "B", 14)
117
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
118
  pdf.set_font("Arial", "", 12)
119
+ for i, mcq in enumerate(mcqs, 1):
120
+ pdf.multi_cell(0, 10, f"{i}. {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
121
  for option in mcq["options"]:
122
  pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
123
  pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
124
 
125
+ # Save the generated PDF to a temporary file
126
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
127
  pdf.output(temp_pdf.name)
128
  pdf_path = temp_pdf.name
129
+
130
  return pdf_path
131
 
132
  def create_error_pdf(message):
 
136
  pdf.cell(200, 10, "Error Report", ln=True, align="C")
137
  pdf.set_font("Arial", "", 12)
138
  pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
139
+
140
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
141
  pdf.output(temp_pdf.name)
142
  error_pdf_path = temp_pdf.name
143
+
144
  return error_pdf_path
145
 
146
  iface = gr.Interface(
147
  fn=transcribe,
148
  inputs=gr.Audio(type="filepath"),
149
+ outputs=gr.File(label="Download Exam Paper (PDF)"),
150
+ title="Voice to Text Converter and Exam Paper Generator",
151
  )
152
 
153
  iface.launch()