Voice-To-Text

Sleeping

lodhrangpt commited on Nov 14, 2024

Commit

b4c43c2

verified ·

1 Parent(s): 6b80a5d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,6 +17,9 @@ except:
 stop_words = set(stopwords.words("english"))
 def transcribe(audio_path):
     with open(audio_path, "rb") as audio_file:
         audio_data = audio_file.read()
@@ -46,16 +49,19 @@ def transcribe(audio_path):
         return create_error_pdf(f"API Error: {error_msg}")
 def extract_key_sentences(transcript):
-    sentences = sent_tokenize(transcript)
     important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
-    top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5]  # Longest sentences, assuming these might hold key details
     return top_sentences
 def generate_questions(sentences):
     long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
     short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
-    # Generate MCQs based on key terms
     mcqs = []
     for sentence in sentences[:5]:
         words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]

 stop_words = set(stopwords.words("english"))
+def custom_sent_tokenize(text):
+    return text.split(". ")
 def transcribe(audio_path):
     with open(audio_path, "rb") as audio_file:
         audio_data = audio_file.read()
         return create_error_pdf(f"API Error: {error_msg}")
 def extract_key_sentences(transcript):
+    try:
+        sentences = sent_tokenize(transcript)
+    except LookupError:
+        sentences = custom_sent_tokenize(transcript)
     important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
+    top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5]
     return top_sentences
 def generate_questions(sentences):
     long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
     short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
     mcqs = []
     for sentence in sentences[:5]:
         words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]