lodhrangpt commited on
Commit
b4c43c2
·
verified ·
1 Parent(s): 6b80a5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -17,6 +17,9 @@ except:
17
 
18
  stop_words = set(stopwords.words("english"))
19
 
 
 
 
20
  def transcribe(audio_path):
21
  with open(audio_path, "rb") as audio_file:
22
  audio_data = audio_file.read()
@@ -46,16 +49,19 @@ def transcribe(audio_path):
46
  return create_error_pdf(f"API Error: {error_msg}")
47
 
48
  def extract_key_sentences(transcript):
49
- sentences = sent_tokenize(transcript)
 
 
 
 
50
  important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
51
- top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5] # Longest sentences, assuming these might hold key details
52
  return top_sentences
53
 
54
  def generate_questions(sentences):
55
  long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
56
  short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
57
 
58
- # Generate MCQs based on key terms
59
  mcqs = []
60
  for sentence in sentences[:5]:
61
  words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]
 
17
 
18
  stop_words = set(stopwords.words("english"))
19
 
20
+ def custom_sent_tokenize(text):
21
+ return text.split(". ")
22
+
23
  def transcribe(audio_path):
24
  with open(audio_path, "rb") as audio_file:
25
  audio_data = audio_file.read()
 
49
  return create_error_pdf(f"API Error: {error_msg}")
50
 
51
  def extract_key_sentences(transcript):
52
+ try:
53
+ sentences = sent_tokenize(transcript)
54
+ except LookupError:
55
+ sentences = custom_sent_tokenize(transcript)
56
+
57
  important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
58
+ top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5]
59
  return top_sentences
60
 
61
  def generate_questions(sentences):
62
  long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
63
  short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
64
 
 
65
  mcqs = []
66
  for sentence in sentences[:5]:
67
  words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]