Voice-To-Text

Sleeping

App Files Files Community

lodhrangpt commited on Nov 14, 2024

Commit

6b80a5d

verified ·

1 Parent(s): 45f7b8d

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -16

app.py CHANGED Viewed

@@ -2,19 +2,20 @@ import gradio as gr
 import requests
 from fpdf import FPDF
 import nltk
-import os
 import tempfile
-from nltk.tokenize import sent_tokenize
 import random
-# Attempt to download punkt tokenizer
 try:
     nltk.download("punkt")
 except:
-    print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
-def custom_sent_tokenize(text):
-    return text.split(". ")
 def transcribe(audio_path):
     with open(audio_path, "rb") as audio_file:
@@ -44,24 +45,35 @@ def transcribe(audio_path):
         print(f"API Error: {error_msg}")
         return create_error_pdf(f"API Error: {error_msg}")
-def generate_notes(transcript):
-    try:
-        sentences = sent_tokenize(transcript)
-    except LookupError:
-        sentences = custom_sent_tokenize(transcript)
-    long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
-    short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
     mcqs = []
     for sentence in sentences[:5]:
         mcq = {
-            "question": f"What is '{sentence.split()[0]}'?",
-            "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
-            "answer": sentence.split()[0]
         }
         mcqs.append(mcq)
     pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
     return pdf_path

 import requests
 from fpdf import FPDF
 import nltk
 import tempfile
+from nltk.tokenize import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
 import random
+# Download necessary NLTK data
 try:
     nltk.download("punkt")
+    nltk.download("stopwords")
 except:
+    print("NLTK data download failed.")
+stop_words = set(stopwords.words("english"))
 def transcribe(audio_path):
     with open(audio_path, "rb") as audio_file:
         print(f"API Error: {error_msg}")
         return create_error_pdf(f"API Error: {error_msg}")
+def extract_key_sentences(transcript):
+    sentences = sent_tokenize(transcript)
+    important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
+    top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5]  # Longest sentences, assuming these might hold key details
+    return top_sentences
+def generate_questions(sentences):
+    long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
+    short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
+    # Generate MCQs based on key terms
     mcqs = []
     for sentence in sentences[:5]:
+        words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]
+        if not words:
+            continue
+        key_word = random.choice(words)
         mcq = {
+            "question": f"What is '{key_word}'?",
+            "options": [key_word] + random.sample(["Option A", "Option B", "Option C"], 3),
+            "answer": key_word
         }
         mcqs.append(mcq)
+    return long_questions, short_questions, mcqs
+def generate_notes(transcript):
+    key_sentences = extract_key_sentences(transcript)
+    long_questions, short_questions, mcqs = generate_questions(key_sentences)
     pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
     return pdf_path