lodhrangpt commited on
Commit
af44622
·
verified ·
1 Parent(s): ce01ec7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -68
app.py CHANGED
@@ -6,7 +6,6 @@ import os
6
  import tempfile
7
  from nltk.tokenize import sent_tokenize
8
  import random
9
- import re
10
 
11
  # Attempt to download punkt tokenizer
12
  try:
@@ -39,111 +38,68 @@ def transcribe(audio_path):
39
  if response.status_code == 200:
40
  result = response.json()
41
  transcript = result.get("text", "No transcription available.")
42
- return generate_exam_paper(transcript)
43
  else:
44
  error_msg = response.json().get("error", {}).get("message", "Unknown error.")
45
  print(f"API Error: {error_msg}")
46
  return create_error_pdf(f"API Error: {error_msg}")
47
 
48
- def generate_exam_paper(transcript):
49
  try:
50
  sentences = sent_tokenize(transcript)
51
  except LookupError:
52
  sentences = custom_sent_tokenize(transcript)
53
 
54
- # Extract important sentences for generating questions
55
- important_sentences = get_important_sentences(sentences)
56
-
57
- # Generate exam-like questions
58
- long_questions = generate_long_questions(important_sentences)
59
- short_questions = generate_short_questions(important_sentences)
60
- mcqs = generate_mcqs(important_sentences)
61
-
62
- # Ensure there are exactly 2 long questions, 5 short questions, and 7 MCQs
63
- long_questions = long_questions[:2] # Limit to 2 long questions
64
- short_questions = short_questions[:5] # Limit to 5 short questions
65
- mcqs = mcqs[:7] # Limit to 7 MCQs
66
-
67
- pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
68
- return pdf_path
69
 
70
- def get_important_sentences(sentences):
71
- # Focus on sentences that are likely to contain key information (like facts or definitions)
72
- important_sentences = []
73
- for sentence in sentences:
74
- # Simplified heuristic: sentences with important nouns/verbs
75
- if len(re.findall(r'\b(NN|VB)\b', sentence)): # Using POS tags to detect nouns/verbs
76
- important_sentences.append(sentence)
77
- return important_sentences
78
-
79
- def generate_long_questions(important_sentences):
80
- long_questions = []
81
- for sentence in important_sentences[:2]: # Limit to 2 long questions
82
- long_questions.append(f"Explain the historical significance of '{sentence}'?")
83
- return long_questions
84
-
85
- def generate_short_questions(important_sentences):
86
- short_questions = []
87
- for sentence in important_sentences[:5]: # Limit to 5 short questions
88
- # Use the first word of the sentence to create short questions
89
- short_questions.append(f"What is the definition of '{sentence.split()[0]}'?")
90
- return short_questions
91
-
92
- def generate_mcqs(important_sentences):
93
  mcqs = []
94
- for sentence in important_sentences[:7]: # Limit to 7 MCQs
95
- # Generate MCQs from the sentence context
96
- key_terms = sentence.split() # Simple tokenization
97
- correct_answer = random.choice(key_terms) # Select a key term as the answer
98
- options = [correct_answer] + random.sample(key_terms, 3) # Select distractors from the sentence
99
- random.shuffle(options) # Shuffle the options
100
  mcq = {
101
- "question": f"What is '{correct_answer}' in the context of the sentence?",
102
- "options": options,
103
- "answer": correct_answer
104
  }
105
  mcqs.append(mcq)
106
- return mcqs
 
 
107
 
108
  def create_pdf(transcript, long_questions, short_questions, mcqs):
109
  pdf = FPDF()
110
  pdf.add_page()
111
-
112
  pdf.set_font("Arial", "B", 16)
113
- pdf.cell(200, 10, "Exam Paper: Transcription Notes", ln=True, align="C")
114
 
115
  pdf.set_font("Arial", "", 12)
116
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
117
 
118
- # Add Long Questions Section
119
  pdf.set_font("Arial", "B", 14)
120
  pdf.cell(200, 10, "Long Questions", ln=True)
121
  pdf.set_font("Arial", "", 12)
122
- for i, question in enumerate(long_questions, 1):
123
- pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
124
 
125
- # Add Short Questions Section
126
  pdf.set_font("Arial", "B", 14)
127
  pdf.cell(200, 10, "Short Questions", ln=True)
128
  pdf.set_font("Arial", "", 12)
129
- for i, question in enumerate(short_questions, 1):
130
- pdf.multi_cell(0, 10, f"{i}. {question.encode('latin1', 'replace').decode('latin1')}\n")
131
 
132
- # Add MCQs Section
133
  pdf.set_font("Arial", "B", 14)
134
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
135
  pdf.set_font("Arial", "", 12)
136
- for i, mcq in enumerate(mcqs, 1):
137
- pdf.multi_cell(0, 10, f"{i}. {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
138
  for option in mcq["options"]:
139
  pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
140
  pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
141
 
142
- # Save the generated PDF to a temporary file
143
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
144
  pdf.output(temp_pdf.name)
145
  pdf_path = temp_pdf.name
146
-
147
  return pdf_path
148
 
149
  def create_error_pdf(message):
@@ -153,18 +109,18 @@ def create_error_pdf(message):
153
  pdf.cell(200, 10, "Error Report", ln=True, align="C")
154
  pdf.set_font("Arial", "", 12)
155
  pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
156
-
157
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
158
  pdf.output(temp_pdf.name)
159
  error_pdf_path = temp_pdf.name
160
-
161
  return error_pdf_path
162
 
163
  iface = gr.Interface(
164
  fn=transcribe,
165
  inputs=gr.Audio(type="filepath"),
166
- outputs=gr.File(label="Download Exam Paper (PDF)"),
167
- title="Voice to Text Converter and Exam Paper Generator",
168
  )
169
 
170
  iface.launch()
 
6
  import tempfile
7
  from nltk.tokenize import sent_tokenize
8
  import random
 
9
 
10
  # Attempt to download punkt tokenizer
11
  try:
 
38
  if response.status_code == 200:
39
  result = response.json()
40
  transcript = result.get("text", "No transcription available.")
41
+ return generate_notes(transcript)
42
  else:
43
  error_msg = response.json().get("error", {}).get("message", "Unknown error.")
44
  print(f"API Error: {error_msg}")
45
  return create_error_pdf(f"API Error: {error_msg}")
46
 
47
+ def generate_notes(transcript):
48
  try:
49
  sentences = sent_tokenize(transcript)
50
  except LookupError:
51
  sentences = custom_sent_tokenize(transcript)
52
 
53
+ long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
54
+ short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  mcqs = []
57
+ for sentence in sentences[:5]:
 
 
 
 
 
58
  mcq = {
59
+ "question": f"What is '{sentence.split()[0]}'?",
60
+ "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
61
+ "answer": sentence.split()[0]
62
  }
63
  mcqs.append(mcq)
64
+
65
+ pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
66
+ return pdf_path
67
 
68
  def create_pdf(transcript, long_questions, short_questions, mcqs):
69
  pdf = FPDF()
70
  pdf.add_page()
71
+
72
  pdf.set_font("Arial", "B", 16)
73
+ pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
74
 
75
  pdf.set_font("Arial", "", 12)
76
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
77
 
 
78
  pdf.set_font("Arial", "B", 14)
79
  pdf.cell(200, 10, "Long Questions", ln=True)
80
  pdf.set_font("Arial", "", 12)
81
+ for question in long_questions:
82
+ pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
83
 
 
84
  pdf.set_font("Arial", "B", 14)
85
  pdf.cell(200, 10, "Short Questions", ln=True)
86
  pdf.set_font("Arial", "", 12)
87
+ for question in short_questions:
88
+ pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
89
 
 
90
  pdf.set_font("Arial", "B", 14)
91
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
92
  pdf.set_font("Arial", "", 12)
93
+ for mcq in mcqs:
94
+ pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
95
  for option in mcq["options"]:
96
  pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
97
  pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
98
 
 
99
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
100
  pdf.output(temp_pdf.name)
101
  pdf_path = temp_pdf.name
102
+
103
  return pdf_path
104
 
105
  def create_error_pdf(message):
 
109
  pdf.cell(200, 10, "Error Report", ln=True, align="C")
110
  pdf.set_font("Arial", "", 12)
111
  pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
112
+
113
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
114
  pdf.output(temp_pdf.name)
115
  error_pdf_path = temp_pdf.name
116
+
117
  return error_pdf_path
118
 
119
  iface = gr.Interface(
120
  fn=transcribe,
121
  inputs=gr.Audio(type="filepath"),
122
+ outputs=gr.File(label="Download PDF with Notes or Error Report"),
123
+ title="Voice to Text Converter and Notes Generator",
124
  )
125
 
126
  iface.launch()