lodhrangpt commited on
Commit
6b80a5d
·
verified ·
1 Parent(s): 45f7b8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -2,19 +2,20 @@ import gradio as gr
2
  import requests
3
  from fpdf import FPDF
4
  import nltk
5
- import os
6
  import tempfile
7
- from nltk.tokenize import sent_tokenize
 
 
8
  import random
9
 
10
- # Attempt to download punkt tokenizer
11
  try:
12
  nltk.download("punkt")
 
13
  except:
14
- print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
15
 
16
- def custom_sent_tokenize(text):
17
- return text.split(". ")
18
 
19
  def transcribe(audio_path):
20
  with open(audio_path, "rb") as audio_file:
@@ -44,24 +45,35 @@ def transcribe(audio_path):
44
  print(f"API Error: {error_msg}")
45
  return create_error_pdf(f"API Error: {error_msg}")
46
 
47
- def generate_notes(transcript):
48
- try:
49
- sentences = sent_tokenize(transcript)
50
- except LookupError:
51
- sentences = custom_sent_tokenize(transcript)
52
 
53
- long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
54
- short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
 
55
 
 
56
  mcqs = []
57
  for sentence in sentences[:5]:
 
 
 
 
58
  mcq = {
59
- "question": f"What is '{sentence.split()[0]}'?",
60
- "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
61
- "answer": sentence.split()[0]
62
  }
63
  mcqs.append(mcq)
64
 
 
 
 
 
 
65
  pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
66
  return pdf_path
67
 
 
2
  import requests
3
  from fpdf import FPDF
4
  import nltk
 
5
  import tempfile
6
+ from nltk.tokenize import sent_tokenize, word_tokenize
7
+ from nltk.corpus import stopwords
8
+ from collections import Counter
9
  import random
10
 
11
+ # Download necessary NLTK data
12
  try:
13
  nltk.download("punkt")
14
+ nltk.download("stopwords")
15
  except:
16
+ print("NLTK data download failed.")
17
 
18
+ stop_words = set(stopwords.words("english"))
 
19
 
20
  def transcribe(audio_path):
21
  with open(audio_path, "rb") as audio_file:
 
45
  print(f"API Error: {error_msg}")
46
  return create_error_pdf(f"API Error: {error_msg}")
47
 
48
+ def extract_key_sentences(transcript):
49
+ sentences = sent_tokenize(transcript)
50
+ important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
51
+ top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5] # Longest sentences, assuming these might hold key details
52
+ return top_sentences
53
 
54
+ def generate_questions(sentences):
55
+ long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
56
+ short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
57
 
58
+ # Generate MCQs based on key terms
59
  mcqs = []
60
  for sentence in sentences[:5]:
61
+ words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]
62
+ if not words:
63
+ continue
64
+ key_word = random.choice(words)
65
  mcq = {
66
+ "question": f"What is '{key_word}'?",
67
+ "options": [key_word] + random.sample(["Option A", "Option B", "Option C"], 3),
68
+ "answer": key_word
69
  }
70
  mcqs.append(mcq)
71
 
72
+ return long_questions, short_questions, mcqs
73
+
74
+ def generate_notes(transcript):
75
+ key_sentences = extract_key_sentences(transcript)
76
+ long_questions, short_questions, mcqs = generate_questions(key_sentences)
77
  pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
78
  return pdf_path
79