Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ import gradio as gr
|
|
12 |
|
13 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
14 |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
15 |
-
max_tokens =
|
16 |
|
17 |
def clean_text(text):
|
18 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
@@ -164,9 +164,9 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
|
|
164 |
prompt = f"""
|
165 |
Analyze the following text (likely a transcript or document) and:
|
166 |
|
167 |
-
1. First, identify
|
168 |
2. For each segment/topic you identify:
|
169 |
-
- Provide a
|
170 |
- List 3-5 key concepts discussed in that segment
|
171 |
- Write a brief summary of that segment (3-5 sentences)
|
172 |
- Create 5 quiz questions based DIRECTLY on the content in that segment
|
@@ -215,7 +215,7 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
|
|
215 |
else:
|
216 |
prompt = f"""
|
217 |
Analyze the following text segment and provide:
|
218 |
-
1. A
|
219 |
2. 3-5 key concepts discussed
|
220 |
3. A brief summary (6-7 sentences)
|
221 |
4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
|
@@ -298,7 +298,7 @@ def process_document_with_quiz(text):
|
|
298 |
token_count = len(tokenizer.encode(text))
|
299 |
print(f"Text contains {token_count} tokens")
|
300 |
|
301 |
-
if token_count <
|
302 |
print("Text is short enough to analyze directly without text segmentation")
|
303 |
full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
|
304 |
|
|
|
12 |
|
13 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
14 |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
15 |
+
max_tokens = 3000
|
16 |
|
17 |
def clean_text(text):
|
18 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
|
|
164 |
prompt = f"""
|
165 |
Analyze the following text (likely a transcript or document) and:
|
166 |
|
167 |
+
1. First, do text segmentation and identify DISTINCT key topics within the text
|
168 |
2. For each segment/topic you identify:
|
169 |
+
- Provide a SPECIFIC and UNIQUE topic name (3-5 words) that clearly differentiates it from other segments
|
170 |
- List 3-5 key concepts discussed in that segment
|
171 |
- Write a brief summary of that segment (3-5 sentences)
|
172 |
- Create 5 quiz questions based DIRECTLY on the content in that segment
|
|
|
215 |
else:
|
216 |
prompt = f"""
|
217 |
Analyze the following text segment and provide:
|
218 |
+
1. A SPECIFIC and DESCRIPTIVE topic name (3-5 words) that precisely captures the main focus
|
219 |
2. 3-5 key concepts discussed
|
220 |
3. A brief summary (6-7 sentences)
|
221 |
4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
|
|
|
298 |
token_count = len(tokenizer.encode(text))
|
299 |
print(f"Text contains {token_count} tokens")
|
300 |
|
301 |
+
if token_count < 7000:
|
302 |
print("Text is short enough to analyze directly without text segmentation")
|
303 |
full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
|
304 |
|