Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,19 +2,20 @@ import gradio as gr
|
|
2 |
import requests
|
3 |
from fpdf import FPDF
|
4 |
import nltk
|
5 |
-
import os
|
6 |
import tempfile
|
7 |
-
from nltk.tokenize import sent_tokenize
|
|
|
|
|
8 |
import random
|
9 |
|
10 |
-
#
|
11 |
try:
|
12 |
nltk.download("punkt")
|
|
|
13 |
except:
|
14 |
-
print("NLTK
|
15 |
|
16 |
-
|
17 |
-
return text.split(". ")
|
18 |
|
19 |
def transcribe(audio_path):
|
20 |
with open(audio_path, "rb") as audio_file:
|
@@ -44,24 +45,35 @@ def transcribe(audio_path):
|
|
44 |
print(f"API Error: {error_msg}")
|
45 |
return create_error_pdf(f"API Error: {error_msg}")
|
46 |
|
47 |
-
def
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
|
54 |
-
|
|
|
55 |
|
|
|
56 |
mcqs = []
|
57 |
for sentence in sentences[:5]:
|
|
|
|
|
|
|
|
|
58 |
mcq = {
|
59 |
-
"question": f"What is '{
|
60 |
-
"options": [
|
61 |
-
"answer":
|
62 |
}
|
63 |
mcqs.append(mcq)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
|
66 |
return pdf_path
|
67 |
|
|
|
2 |
import requests
|
3 |
from fpdf import FPDF
|
4 |
import nltk
|
|
|
5 |
import tempfile
|
6 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
7 |
+
from nltk.corpus import stopwords
|
8 |
+
from collections import Counter
|
9 |
import random
|
10 |
|
11 |
+
# Download necessary NLTK data
|
12 |
try:
|
13 |
nltk.download("punkt")
|
14 |
+
nltk.download("stopwords")
|
15 |
except:
|
16 |
+
print("NLTK data download failed.")
|
17 |
|
18 |
+
stop_words = set(stopwords.words("english"))
|
|
|
19 |
|
20 |
def transcribe(audio_path):
|
21 |
with open(audio_path, "rb") as audio_file:
|
|
|
45 |
print(f"API Error: {error_msg}")
|
46 |
return create_error_pdf(f"API Error: {error_msg}")
|
47 |
|
48 |
+
def extract_key_sentences(transcript):
|
49 |
+
sentences = sent_tokenize(transcript)
|
50 |
+
important_sentences = [sentence for sentence in sentences if any(word.lower() not in stop_words for word in word_tokenize(sentence))]
|
51 |
+
top_sentences = sorted(important_sentences, key=lambda x: len(x), reverse=True)[:5] # Longest sentences, assuming these might hold key details
|
52 |
+
return top_sentences
|
53 |
|
54 |
+
def generate_questions(sentences):
|
55 |
+
long_questions = [f"Explain the importance of: '{sentence}'." for sentence in sentences]
|
56 |
+
short_questions = [f"What does '{sentence.split()[0]}' refer to?" for sentence in sentences[:5]]
|
57 |
|
58 |
+
# Generate MCQs based on key terms
|
59 |
mcqs = []
|
60 |
for sentence in sentences[:5]:
|
61 |
+
words = [word for word in word_tokenize(sentence) if word.isalpha() and word.lower() not in stop_words]
|
62 |
+
if not words:
|
63 |
+
continue
|
64 |
+
key_word = random.choice(words)
|
65 |
mcq = {
|
66 |
+
"question": f"What is '{key_word}'?",
|
67 |
+
"options": [key_word] + random.sample(["Option A", "Option B", "Option C"], 3),
|
68 |
+
"answer": key_word
|
69 |
}
|
70 |
mcqs.append(mcq)
|
71 |
|
72 |
+
return long_questions, short_questions, mcqs
|
73 |
+
|
74 |
+
def generate_notes(transcript):
|
75 |
+
key_sentences = extract_key_sentences(transcript)
|
76 |
+
long_questions, short_questions, mcqs = generate_questions(key_sentences)
|
77 |
pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
|
78 |
return pdf_path
|
79 |
|