MrSimple01 commited on
Commit
ed8d8f8
·
verified ·
1 Parent(s): a23b70d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -12,7 +12,7 @@ import gradio as gr
12
 
13
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
14
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
15
- max_tokens = 4000
16
 
17
  def clean_text(text):
18
  text = re.sub(r'\[speaker_\d+\]', '', text)
@@ -164,9 +164,9 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
164
  prompt = f"""
165
  Analyze the following text (likely a transcript or document) and:
166
 
167
- 1. First, identify distinct segments or topics within the text
168
  2. For each segment/topic you identify:
169
- - Provide a concise topic name (3-5 words)
170
  - List 3-5 key concepts discussed in that segment
171
  - Write a brief summary of that segment (3-5 sentences)
172
  - Create 5 quiz questions based DIRECTLY on the content in that segment
@@ -215,7 +215,7 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
215
  else:
216
  prompt = f"""
217
  Analyze the following text segment and provide:
218
- 1. A concise topic name (3-5 words)
219
  2. 3-5 key concepts discussed
220
  3. A brief summary (6-7 sentences)
221
  4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
@@ -298,7 +298,7 @@ def process_document_with_quiz(text):
298
  token_count = len(tokenizer.encode(text))
299
  print(f"Text contains {token_count} tokens")
300
 
301
- if token_count < 12000:
302
  print("Text is short enough to analyze directly without text segmentation")
303
  full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
304
 
 
12
 
13
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
14
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
15
+ max_tokens = 3000
16
 
17
  def clean_text(text):
18
  text = re.sub(r'\[speaker_\d+\]', '', text)
 
164
  prompt = f"""
165
  Analyze the following text (likely a transcript or document) and:
166
 
167
+ 1. First, do text segmentation and identify DISTINCT key topics within the text
168
  2. For each segment/topic you identify:
169
+ - Provide a SPECIFIC and UNIQUE topic name (3-5 words) that clearly differentiates it from other segments
170
  - List 3-5 key concepts discussed in that segment
171
  - Write a brief summary of that segment (3-5 sentences)
172
  - Create 5 quiz questions based DIRECTLY on the content in that segment
 
215
  else:
216
  prompt = f"""
217
  Analyze the following text segment and provide:
218
+ 1. A SPECIFIC and DESCRIPTIVE topic name (3-5 words) that precisely captures the main focus
219
  2. 3-5 key concepts discussed
220
  3. A brief summary (6-7 sentences)
221
  4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
 
298
  token_count = len(tokenizer.encode(text))
299
  print(f"Text contains {token_count} tokens")
300
 
301
+ if token_count < 7000:
302
  print("Text is short enough to analyze directly without text segmentation")
303
  full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
304