MrSimple01 commited on
Commit
7e2e84e
·
verified ·
1 Parent(s): ed8d8f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -42
app.py CHANGED
@@ -164,12 +164,18 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
164
  prompt = f"""
165
  Analyze the following text (likely a transcript or document) and:
166
 
167
- 1. First, do text segmentation and identify DISTINCT key topics within the text
 
 
 
 
 
 
168
  2. For each segment/topic you identify:
169
  - Provide a SPECIFIC and UNIQUE topic name (3-5 words) that clearly differentiates it from other segments
170
  - List 3-5 key concepts discussed in that segment
171
  - Write a brief summary of that segment (3-5 sentences)
172
- - Create 5 quiz questions based DIRECTLY on the content in that segment
173
 
174
  For each quiz question:
175
  - Create one correct answer that comes DIRECTLY from the text
@@ -182,6 +188,7 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
182
 
183
  Format your response as JSON with the following structure:
184
  {{
 
185
  "segments": [
186
  {{
187
  "topic_name": "Name of segment 1",
@@ -211,14 +218,27 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
211
  // More segments...
212
  ]
213
  }}
 
 
 
 
 
 
214
  """
215
  else:
216
  prompt = f"""
217
- Analyze the following text segment and provide:
 
 
 
 
 
 
 
218
  1. A SPECIFIC and DESCRIPTIVE topic name (3-5 words) that precisely captures the main focus
219
  2. 3-5 key concepts discussed
220
  3. A brief summary (6-7 sentences)
221
- 4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
222
 
223
  For each quiz question:
224
  - Create one correct answer that comes DIRECTLY from the text
@@ -231,6 +251,7 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
231
 
232
  Format your response as JSON with the following structure:
233
  {{
 
234
  "topic_name": "Name of the topic",
235
  "key_concepts": ["concept1", "concept2", "concept3"],
236
  "summary": "Brief summary of the text segment.",
@@ -255,50 +276,19 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
255
  // More questions...
256
  ]
257
  }}
 
 
 
 
 
 
258
  """
259
-
260
- response = llm.invoke(prompt)
261
-
262
- response_text = response.content
263
-
264
- try:
265
- json_match = re.search(r'\{[\s\S]*\}', response_text)
266
- if json_match:
267
- response_json = json.loads(json_match.group(0))
268
- else:
269
- response_json = json.loads(response_text)
270
-
271
- return response_json
272
- except json.JSONDecodeError as e:
273
- print(f"Error parsing JSON response: {e}")
274
- print(f"Raw response: {response_text}")
275
-
276
- if is_full_text:
277
- return {
278
- "segments": [
279
- {
280
- "topic_name": "JSON Parsing Error",
281
- "key_concepts": ["Error in response format"],
282
- "summary": f"Could not parse the API response. Raw text: {response_text[:200]}...",
283
- "quiz_questions": []
284
- }
285
- ]
286
- }
287
- else:
288
- return {
289
- "topic_name": "JSON Parsing Error",
290
- "key_concepts": ["Error in response format"],
291
- "summary": f"Could not parse the API response. Raw text: {response_text[:200]}...",
292
- "quiz_questions": []
293
- }
294
-
295
-
296
 
297
  def process_document_with_quiz(text):
298
  token_count = len(tokenizer.encode(text))
299
  print(f"Text contains {token_count} tokens")
300
 
301
- if token_count < 7000:
302
  print("Text is short enough to analyze directly without text segmentation")
303
  full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
304
 
 
164
  prompt = f"""
165
  Analyze the following text (likely a transcript or document) and:
166
 
167
+ FIRST ASSESS THE TEXT:
168
+ - Check if it's primarily self-introduction, biographical information, or conclusion
169
+ - Check if it's too short or lacks meaningful content (less than 100 words of substance)
170
+ - If either case is true, respond with a simple JSON: {{"status": "insufficient", "reason": "Brief explanation"}}
171
+
172
+ IF THE TEXT HAS SUFFICIENT MEANINGFUL CONTENT:
173
+ 1. Identify DISTINCT key topics within the text
174
  2. For each segment/topic you identify:
175
  - Provide a SPECIFIC and UNIQUE topic name (3-5 words) that clearly differentiates it from other segments
176
  - List 3-5 key concepts discussed in that segment
177
  - Write a brief summary of that segment (3-5 sentences)
178
+ - Create 5 quiz questions based DIRECTLY on the content in that segment, ONLY if the segment contains factual information
179
 
180
  For each quiz question:
181
  - Create one correct answer that comes DIRECTLY from the text
 
188
 
189
  Format your response as JSON with the following structure:
190
  {{
191
+ "status": "processed",
192
  "segments": [
193
  {{
194
  "topic_name": "Name of segment 1",
 
218
  // More segments...
219
  ]
220
  }}
221
+
222
+ OR if the text is just introductory, concluding, or insufficient:
223
+ {{
224
+ "status": "insufficient",
225
+ "reason": "Brief explanation of why (e.g., 'Text is primarily self-introduction', 'Text is too short', etc.)"
226
+ }}
227
  """
228
  else:
229
  prompt = f"""
230
+ Analyze the following text segment and:
231
+
232
+ FIRST ASSESS THE TEXT:
233
+ - Check if it's primarily self-introduction, biographical information, or conclusion
234
+ - Check if it's too short or lacks meaningful content (less than 100 words of substance)
235
+ - If either case is true, respond with a simple JSON: {{"status": "insufficient", "reason": "Brief explanation"}}
236
+
237
+ IF THE TEXT HAS SUFFICIENT MEANINGFUL CONTENT:
238
  1. A SPECIFIC and DESCRIPTIVE topic name (3-5 words) that precisely captures the main focus
239
  2. 3-5 key concepts discussed
240
  3. A brief summary (6-7 sentences)
241
+ 4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary), ONLY if the segment contains factual information
242
 
243
  For each quiz question:
244
  - Create one correct answer that comes DIRECTLY from the text
 
251
 
252
  Format your response as JSON with the following structure:
253
  {{
254
+ "status": "processed",
255
  "topic_name": "Name of the topic",
256
  "key_concepts": ["concept1", "concept2", "concept3"],
257
  "summary": "Brief summary of the text segment.",
 
276
  // More questions...
277
  ]
278
  }}
279
+
280
+ OR if the text is just introductory, concluding, or insufficient:
281
+ {{
282
+ "status": "insufficient",
283
+ "reason": "Brief explanation of why (e.g., 'Text is primarily self-introduction', 'Text is too short', etc.)"
284
+ }}
285
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  def process_document_with_quiz(text):
288
  token_count = len(tokenizer.encode(text))
289
  print(f"Text contains {token_count} tokens")
290
 
291
+ if token_count < 8000:
292
  print("Text is short enough to analyze directly without text segmentation")
293
  full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
294