lodhrangpt commited on
Commit
78a2aef
·
verified ·
1 Parent(s): bcd1dcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -10
app.py CHANGED
@@ -6,6 +6,7 @@ import os
6
  import tempfile
7
  from nltk.tokenize import sent_tokenize
8
  import random
 
9
 
10
  # Attempt to download punkt tokenizer
11
  try:
@@ -50,20 +51,42 @@ def generate_notes(transcript):
50
  except LookupError:
51
  sentences = custom_sent_tokenize(transcript)
52
 
53
- long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
54
- short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
56
  mcqs = []
57
- for sentence in sentences[:5]:
 
 
 
 
 
58
  mcq = {
59
- "question": f"What is '{sentence.split()[0]}'?",
60
- "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
61
- "answer": sentence.split()[0]
62
  }
63
  mcqs.append(mcq)
64
-
65
- pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
66
- return pdf_path
67
 
68
  def create_pdf(transcript, long_questions, short_questions, mcqs):
69
  pdf = FPDF()
@@ -75,18 +98,21 @@ def create_pdf(transcript, long_questions, short_questions, mcqs):
75
  pdf.set_font("Arial", "", 12)
76
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
77
 
 
78
  pdf.set_font("Arial", "B", 14)
79
  pdf.cell(200, 10, "Long Questions", ln=True)
80
  pdf.set_font("Arial", "", 12)
81
  for question in long_questions:
82
  pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
83
 
 
84
  pdf.set_font("Arial", "B", 14)
85
  pdf.cell(200, 10, "Short Questions", ln=True)
86
  pdf.set_font("Arial", "", 12)
87
  for question in short_questions:
88
  pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
89
 
 
90
  pdf.set_font("Arial", "B", 14)
91
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
92
  pdf.set_font("Arial", "", 12)
@@ -123,4 +149,4 @@ iface = gr.Interface(
123
  title="Voice to Text Converter and Notes Generator",
124
  )
125
 
126
- iface.launch()
 
6
  import tempfile
7
  from nltk.tokenize import sent_tokenize
8
  import random
9
+ import re
10
 
11
  # Attempt to download punkt tokenizer
12
  try:
 
51
  except LookupError:
52
  sentences = custom_sent_tokenize(transcript)
53
 
54
+ # Extract key sentences for generating questions
55
+ important_sentences = get_important_sentences(sentences)
56
+
57
+ # Generate long questions, short questions, and MCQs
58
+ long_questions = [f"What is meant by '{sentence}'?" for sentence in important_sentences[:5]]
59
+ short_questions = [f"Define '{sentence.split()[0]}'." for sentence in important_sentences[:5]]
60
+
61
+ mcqs = generate_mcqs(important_sentences)
62
+
63
+ pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
64
+ return pdf_path
65
 
66
+ def get_important_sentences(sentences):
67
+ # Prioritize sentences that contain nouns or verbs to be more relevant
68
+ important_sentences = []
69
+ for sentence in sentences:
70
+ # Simple rule: sentences with nouns/verbs are considered important
71
+ if len(re.findall(r'\b(NN|VB)\b', sentence)): # Using POS tags to detect nouns/verbs
72
+ important_sentences.append(sentence)
73
+ return important_sentences[:5] # Limit to top 5 important sentences
74
+
75
+ def generate_mcqs(important_sentences):
76
  mcqs = []
77
+ for sentence in important_sentences:
78
+ # Generate MCQs from meaningful sentences
79
+ key_terms = sentence.split() # Split sentence into words (simple tokenization)
80
+ correct_answer = random.choice(key_terms) # Randomly select a key term from the sentence
81
+ options = [correct_answer] + random.sample(key_terms, 3) # Create multiple choice options
82
+ random.shuffle(options) # Shuffle options
83
  mcq = {
84
+ "question": f"What is '{correct_answer}' in the context of the sentence?",
85
+ "options": options,
86
+ "answer": correct_answer
87
  }
88
  mcqs.append(mcq)
89
+ return mcqs
 
 
90
 
91
  def create_pdf(transcript, long_questions, short_questions, mcqs):
92
  pdf = FPDF()
 
98
  pdf.set_font("Arial", "", 12)
99
  pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
100
 
101
+ # Add long questions section
102
  pdf.set_font("Arial", "B", 14)
103
  pdf.cell(200, 10, "Long Questions", ln=True)
104
  pdf.set_font("Arial", "", 12)
105
  for question in long_questions:
106
  pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
107
 
108
+ # Add short questions section
109
  pdf.set_font("Arial", "B", 14)
110
  pdf.cell(200, 10, "Short Questions", ln=True)
111
  pdf.set_font("Arial", "", 12)
112
  for question in short_questions:
113
  pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
114
 
115
+ # Add MCQs section
116
  pdf.set_font("Arial", "B", 14)
117
  pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
118
  pdf.set_font("Arial", "", 12)
 
149
  title="Voice to Text Converter and Notes Generator",
150
  )
151
 
152
+ iface.launch()