Dannyar608 commited on
Commit
b02a8be
·
verified ·
1 Parent(s): e0625a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -954
app.py CHANGED
@@ -41,8 +41,8 @@ logging.basicConfig(
41
  filename='transcript_parser.log'
42
  )
43
 
44
- # Model configuration - Only DeepSeek
45
- MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
46
 
47
  # Initialize Hugging Face API
48
  if HF_TOKEN:
@@ -52,14 +52,6 @@ if HF_TOKEN:
52
  except Exception as e:
53
  logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
54
 
55
- # ========== CACHING AND PERFORMANCE OPTIMIZATIONS ==========
56
- executor = ThreadPoolExecutor(max_workers=4)
57
-
58
- # Cache model loading
59
- @lru_cache(maxsize=1)
60
- def get_model_and_tokenizer():
61
- return model_loader.load_model()
62
-
63
  # ========== MODEL LOADER ==========
64
  class ModelLoader:
65
  def __init__(self):
@@ -76,7 +68,6 @@ class ModelLoader:
76
  if progress:
77
  progress(0.1, desc="Checking GPU availability...")
78
 
79
- # Clear CUDA cache first
80
  torch.cuda.empty_cache()
81
 
82
  if progress:
@@ -90,13 +81,12 @@ class ModelLoader:
90
  if progress:
91
  progress(0.5, desc="Loading model (this may take a few minutes)...")
92
 
93
- # More robust model loading
94
  model_kwargs = {
95
  "trust_remote_code": True,
96
  "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
97
  "device_map": "auto" if self.device == "cuda" else None,
98
  "low_cpu_mem_usage": True,
99
- "offload_folder": "offload" # For handling large models
100
  }
101
 
102
  try:
@@ -105,7 +95,6 @@ class ModelLoader:
105
  **model_kwargs
106
  )
107
  except torch.cuda.OutOfMemoryError:
108
- # Fallback to CPU if GPU OOM
109
  model_kwargs["device_map"] = None
110
  model = AutoModelForCausalLM.from_pretrained(
111
  MODEL_NAME,
@@ -113,7 +102,6 @@ class ModelLoader:
113
  ).to('cpu')
114
  self.device = 'cpu'
115
 
116
- # Verify model is responsive
117
  test_input = tokenizer("Test", return_tensors="pt").to(self.device)
118
  _ = model.generate(**test_input, max_new_tokens=1)
119
 
@@ -131,29 +119,27 @@ class ModelLoader:
131
  # Initialize model loader
132
  model_loader = ModelLoader()
133
 
 
 
 
 
134
  # ========== UTILITY FUNCTIONS ==========
135
  def generate_session_token() -> str:
136
- """Generate a random session token for user identification."""
137
  alphabet = string.ascii_letters + string.digits
138
  return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
139
 
140
  def sanitize_input(text: str) -> str:
141
- """Sanitize user input to prevent XSS and injection attacks."""
142
  if not text:
143
  return ""
144
- # Basic HTML escaping and removal of potentially dangerous characters
145
  text = html.escape(text.strip())
146
- # Remove any remaining HTML tags
147
  text = re.sub(r'<[^>]*>', '', text)
148
- # Remove potentially dangerous characters
149
  text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
150
  return text
151
 
152
  def validate_name(name: str) -> str:
153
- """Validate name input."""
154
  name = name.strip()
155
  if not name:
156
- raise ValueError("Name cannot be empty. Please enter your full name.")
157
  if len(name) > 100:
158
  raise ValueError("Name is too long (maximum 100 characters).")
159
  if any(c.isdigit() for c in name):
@@ -161,7 +147,6 @@ def validate_name(name: str) -> str:
161
  return name
162
 
163
  def validate_age(age: Union[int, float, str]) -> int:
164
- """Validate and convert age input."""
165
  try:
166
  age_int = int(age)
167
  if not MIN_AGE <= age_int <= MAX_AGE:
@@ -171,7 +156,6 @@ def validate_age(age: Union[int, float, str]) -> int:
171
  raise ValueError("Please enter a valid age number.")
172
 
173
  def validate_file(file_obj) -> None:
174
- """Validate uploaded file."""
175
  if not file_obj:
176
  raise ValueError("Please upload a file first")
177
 
@@ -179,24 +163,22 @@ def validate_file(file_obj) -> None:
179
  if file_ext not in ALLOWED_FILE_TYPES:
180
  raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
181
 
182
- file_size = os.path.getsize(file_obj.name) / (1024 * 1024) # MB
183
  if file_size > MAX_FILE_SIZE_MB:
184
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
185
 
186
  # ========== TEXT EXTRACTION FUNCTIONS ==========
187
  def extract_text_from_file(file_path: str, file_ext: str) -> str:
188
- """Enhanced text extraction with better error handling and fallbacks."""
189
  text = ""
190
 
191
  try:
192
  if file_ext == '.pdf':
193
- # First try PyMuPDF for text extraction
194
  try:
195
  doc = fitz.open(file_path)
196
  for page in doc:
197
  text += page.get_text("text") + '\n'
198
  if not text.strip():
199
- raise ValueError("PyMuPDF returned empty text - the PDF may be image-based")
200
  except Exception as e:
201
  logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
202
  text = extract_text_from_pdf_with_ocr(file_path)
@@ -204,56 +186,44 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
204
  elif file_ext in ['.png', '.jpg', '.jpeg']:
205
  text = extract_text_with_ocr(file_path)
206
 
207
- # Clean up the extracted text
208
  text = clean_extracted_text(text)
209
 
210
  if not text.strip():
211
- raise ValueError("No text could be extracted. Please ensure the file is clear and readable.")
212
 
213
  return text
214
 
215
  except Exception as e:
216
  logging.error(f"Text extraction error: {str(e)}")
217
- raise gr.Error(f"Failed to extract text: {str(e)}\n\nTIPS:\n1. For PDFs, try saving as a different PDF format\n2. For images, ensure they are high-quality and well-lit\n3. Try cropping to just the transcript area")
218
 
219
  def extract_text_from_pdf_with_ocr(file_path: str) -> str:
220
- """Fallback PDF text extraction using OCR."""
221
  text = ""
222
  try:
223
  doc = fitz.open(file_path)
224
  for page in doc:
225
  pix = page.get_pixmap()
226
  img = Image.open(io.BytesIO(pix.tobytes()))
227
- # Preprocess image for better OCR
228
- img = img.convert('L') # Grayscale
229
- img = img.point(lambda x: 0 if x < 128 else 255) # Binarize
230
  text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
231
  except Exception as e:
232
- raise ValueError(f"PDF OCR failed: {str(e)}. The PDF may be password protected or corrupted.")
233
  return text
234
 
235
  def extract_text_with_ocr(file_path: str) -> str:
236
- """Extract text from image files using OCR with preprocessing."""
237
  try:
238
  image = Image.open(file_path)
239
-
240
- # Enhanced preprocessing
241
- image = image.convert('L') # Convert to grayscale
242
- image = image.point(lambda x: 0 if x < 128 else 255, '1') # Thresholding
243
-
244
- # Custom Tesseract configuration
245
  custom_config = r'--oem 3 --psm 6'
246
  text = pytesseract.image_to_string(image, config=custom_config)
247
  return text
248
  except Exception as e:
249
- raise ValueError(f"OCR processing failed: {str(e)}. Please ensure the image is clear and not blurry.")
250
 
251
  def clean_extracted_text(text: str) -> str:
252
- """Clean and normalize the extracted text."""
253
- # Remove multiple spaces and newlines
254
  text = re.sub(r'\s+', ' ', text).strip()
255
-
256
- # Fix common OCR errors
257
  replacements = {
258
  '|': 'I',
259
  '‘': "'",
@@ -263,38 +233,16 @@ def clean_extracted_text(text: str) -> str:
263
  'fi': 'fi',
264
  'fl': 'fl'
265
  }
266
-
267
  for wrong, right in replacements.items():
268
  text = text.replace(wrong, right)
269
-
270
  return text
271
 
272
  def remove_sensitive_info(text: str) -> str:
273
- """Remove potentially sensitive information from transcript text."""
274
- # Remove social security numbers
275
  text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text)
276
- # Remove student IDs (assuming 6-9 digit numbers)
277
  text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
278
- # Remove email addresses
279
  text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
280
  return text
281
 
282
- def validate_parsed_data(data: Dict) -> bool:
283
- """Validate the structure of parsed transcript data"""
284
- required_student_fields = ['name', 'current_grade']
285
- required_course_fields = ['description', 'credits']
286
-
287
- if 'student_info' not in data:
288
- return False
289
- if not all(field in data['student_info'] for field in required_student_fields):
290
- return False
291
- if 'course_history' not in data or not isinstance(data['course_history'], list):
292
- return False
293
- if len(data['course_history']) > 0:
294
- if not all(field in data['course_history'][0] for field in required_course_fields):
295
- return False
296
- return True
297
-
298
  # ========== TRANSCRIPT PARSING ==========
299
  class TranscriptParser:
300
  def __init__(self):
@@ -305,353 +253,45 @@ class TranscriptParser:
305
  self.graduation_status = {}
306
 
307
  def parse_transcript(self, text: str) -> Dict:
308
- """Parse Miami-Dade formatted transcripts with updated regex patterns."""
309
  try:
310
- # First try structured parsing for Miami-Dade format
311
- if "Graduation Progress Summary" in text or "Miami-Dade" in text:
312
- return self._parse_miami_dade_format(text)
313
- else:
314
- # Fall back to AI parsing if not Miami-Dade format
315
- return parse_transcript_with_ai_fallback(text)
316
-
317
- except Exception as e:
318
- logging.error(f"Error parsing transcript: {str(e)}")
319
- raise ValueError(f"Couldn't parse transcript: {str(e)}")
320
-
321
- def _parse_miami_dade_format(self, text: str, strict_mode: bool = False) -> Dict:
322
- """Parse Miami-Dade County Public Schools transcripts."""
323
- # Initialize PDF reader from text (simulating the PDF structure)
324
- lines = [line.strip() for line in text.split('\n') if line.strip()]
325
-
326
- # Initialize data structure
327
- data = {
328
- 'student_info': {},
329
- 'graduation_requirements': [],
330
- 'course_history': [],
331
- 'summary': {},
332
- 'format': 'miami_dade' # Add format identifier
333
- }
334
-
335
- # Parse student information
336
- student_info_found = False
337
- for i, line in enumerate(lines):
338
- if "DORAL ACADEMY HIGH SCHOOL" in line:
339
- # School info line
340
- school_info = line.split('|')
341
- if len(school_info) > 1:
342
- data['student_info']['school'] = school_info[1].strip()
343
- data['student_info']['district'] = school_info[2].strip() if len(school_info) > 2 else ''
344
-
345
- # Student ID and name line
346
- if i+1 < len(lines):
347
- student_line = lines[i+1].split('-')
348
- if len(student_line) > 1:
349
- name_parts = student_line[1].split(',')
350
- if len(name_parts) > 1:
351
- data['student_info']['student_id'] = student_line[0].strip()
352
- data['student_info']['student_name'] = name_parts[1].strip() + " " + name_parts[0].strip()
353
-
354
- # Academic info line
355
- if i+2 < len(lines):
356
- academic_info = lines[i+2].split('|')
357
- if len(academic_info) > 1:
358
- data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip() if ':' in academic_info[1] else academic_info[1].strip()
359
- if len(academic_info) > 2:
360
- data['student_info']['graduation_year'] = academic_info[2].strip()
361
- if len(academic_info) > 3:
362
- gpa_part = academic_info[3].strip()
363
- if 'Weighted GPA' in gpa_part:
364
- data['student_info']['weighted_gpa'] = gpa_part.split(':')[1].strip() if ':' in gpa_part else ''
365
- elif 'Un-weighted GPA' in gpa_part:
366
- data['student_info']['unweighted_gpa'] = gpa_part.split(':')[1].strip() if ':' in gpa_part else ''
367
- if len(academic_info) > 4:
368
- data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip() if ':' in academic_info[4] else ''
369
- if len(academic_info) > 5:
370
- data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip() if ':' in academic_info[5] else ''
371
-
372
- student_info_found = True
373
- break
374
-
375
- if not student_info_found and strict_mode:
376
- raise ValueError("Could not find student information section")
377
-
378
- # Parse graduation requirements
379
- requirements_start = None
380
- requirements_end = None
381
- for i, line in enumerate(lines):
382
- if "Code" in line and "Description" in line and "Required" in line:
383
- requirements_start = i + 1
384
- if requirements_start and "Total" in line:
385
- requirements_end = i
386
- break
387
-
388
- if requirements_start and requirements_end:
389
- for line in lines[requirements_start:requirements_end]:
390
- if '|' in line:
391
- parts = [p.strip() for p in line.split('|') if p.strip()]
392
- if len(parts) >= 6:
393
- req = {
394
- 'code': parts[0],
395
- 'description': parts[1],
396
- 'required': parts[2],
397
- 'waived': parts[3],
398
- 'completed': parts[4],
399
- 'status': parts[5]
400
- }
401
- data['graduation_requirements'].append(req)
402
 
403
- # Parse total line
404
- if requirements_end < len(lines):
405
- total_line = lines[requirements_end]
406
- total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
407
- if len(total_parts) >= 5:
408
- data['summary']['total_required'] = total_parts[1]
409
- data['summary']['total_waived'] = total_parts[2]
410
- data['summary']['total_completed'] = total_parts[3]
411
- data['summary']['completion_percentage'] = total_parts[4]
412
-
413
- # Parse course history
414
- course_history_start = None
415
- for i, line in enumerate(lines):
416
- if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
417
- course_history_start = i + 1
418
- break
419
-
420
- if course_history_start:
421
- current_requirement = None
422
- for line in lines[course_history_start:]:
423
- if '|' in line:
424
- parts = [p.strip() for p in line.split('|') if p.strip()]
425
-
426
- # Check if this is a new requirement line
427
- if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
428
- current_requirement = parts[0]
429
- parts = parts[1:] # Remove the requirement code
430
-
431
- if len(parts) >= 9:
432
- course = {
433
- 'requirement': current_requirement,
434
- 'school_year': parts[0],
435
- 'grade_level': parts[1],
436
- 'course_number': parts[2],
437
- 'description': parts[3],
438
- 'term': parts[4],
439
- 'district_number': parts[5],
440
- 'fg': parts[6],
441
- 'included': parts[7],
442
- 'credits': parts[8]
443
- }
444
- data['course_history'].append(course)
445
-
446
- # Calculate graduation status
447
- try:
448
- if data['summary'].get('total_required') and data['summary'].get('total_completed'):
449
- graduation_status = {
450
- 'total_required_credits': float(data['summary']['total_required']),
451
- 'total_completed_credits': float(data['summary']['total_completed']),
452
- 'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
453
- 'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
454
- 'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
455
- }
456
- data['graduation_status'] = graduation_status
457
- except Exception as e:
458
- if strict_mode:
459
- raise ValueError(f"Error calculating graduation status: {str(e)}")
460
-
461
- return data
462
-
463
- def format_transcript_output(data: Dict) -> str:
464
- """Enhanced formatting for transcript output with format awareness"""
465
- output = []
466
-
467
- # Student Info Section
468
- student = data.get("student_info", {})
469
- output.append(f"## Student Transcript Summary\n{'='*50}")
470
- output.append(f"**Name:** {student.get('name', 'Unknown')}")
471
- output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
472
- output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
473
- output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
474
-
475
- if 'unweighted_gpa' in student and 'weighted_gpa' in student:
476
- output.append(f"**Unweighted GPA:** {student['unweighted_gpa']}")
477
- output.append(f"**Weighted GPA:** {student['weighted_gpa']}")
478
- elif 'gpa' in student:
479
- output.append(f"**GPA:** {student['gpa']}")
480
-
481
- if 'total_credits' in student:
482
- output.append(f"**Total Credits Earned:** {student['total_credits']}")
483
- if 'community_service_hours' in student:
484
- output.append(f"**Community Service Hours:** {student['community_service_hours']}")
485
-
486
- output.append("")
487
-
488
- # Graduation Requirements Section (for Miami-Dade format)
489
- if data.get('format') == 'miami_dade':
490
- grad_status = data.get("graduation_status", {})
491
- output.append(f"## Graduation Progress\n{'='*50}")
492
- output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
493
- output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
494
- output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
495
- output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
496
- output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
497
-
498
- # Detailed Requirements
499
- output.append("### Detailed Requirements:")
500
- for req in data.get("graduation_requirements", []):
501
- output.append(
502
- f"- **{req['code']}**: {req['description']}\n"
503
- f" Required: {req['required']} | Completed: {req['completed']} | "
504
- f"Status: {req['status']}"
505
- )
506
- output.append("")
507
-
508
- # Current Courses
509
- if any(c.get('credits', '') == 'inProgress' for c in data.get("course_history", [])):
510
- output.append("## Current Courses (In Progress)\n" + '='*50)
511
- for course in data["course_history"]:
512
- if course.get('credits', '') == 'inProgress':
513
- output.append(
514
- f"- **{course['course_number']} {course['description']}**\n"
515
- f" Category: {course['requirement']} | "
516
- f"Grade Level: {course['grade_level']} | "
517
- f"Term: {course['term']} | Credits: {course['credits']}"
518
- )
519
- output.append("")
520
-
521
- # Course History by Year
522
- courses_by_year = defaultdict(list)
523
- for course in data.get("course_history", []):
524
- if course.get("school_year"):
525
- courses_by_year[course["school_year"]].append(course)
526
-
527
- if courses_by_year:
528
- output.append("## Course History\n" + '='*50)
529
- for year in sorted(courses_by_year.keys()):
530
- output.append(f"\n### {year}")
531
- for course in courses_by_year[year]:
532
- output.append(
533
- f"- **{course.get('course_number', '')} {course.get('description', 'Unnamed course')}**\n"
534
- f" Subject: {course.get('requirement', 'N/A')} | "
535
- f"Grade: {course.get('fg', 'N/A')} | "
536
- f"Credits: {course.get('credits', 'N/A')}"
537
- )
538
-
539
- return '\n'.join(output)
540
-
541
- def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict:
542
- """More robust AI parsing with better error handling"""
543
- try:
544
- text = remove_sensitive_info(text[:20000]) # Increased limit
545
-
546
- # Improved prompt with examples
547
- prompt = f"""Extract academic transcript data as JSON. Follow this structure:
548
-
549
- Example Input:
550
- Student ID: 1234567 Name: DOE, JOHN Current Grade: 12 YOG: 2024
551
- Unweighted GPA: 3.5 Weighted GPA: 4.2 Total Credits: 24.5
552
-
553
- Example Output:
554
- {{
555
- "student_info": {{
556
- "name": "John Doe",
557
- "id": "1234567",
558
- "current_grade": "12",
559
- "graduation_year": "2024",
560
- "unweighted_gpa": 3.5,
561
- "weighted_gpa": 4.2,
562
- "total_credits": 24.5
563
- }},
564
- "course_history": [
565
- {{
566
- "course_code": "MATH101",
567
- "description": "Algebra I",
568
- "grade": "A",
569
- "credits": 1.0,
570
- "school_year": "2022-2023"
571
- }}
572
- ]
573
- }}
574
-
575
- Actual Transcript:
576
- {text}
577
- """
578
-
579
- if progress:
580
- progress(0.3, desc="Processing with AI...")
581
-
582
- model, tokenizer = get_model_and_tokenizer()
583
- if model is None:
584
- raise ValueError("Model not loaded")
585
 
586
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(model_loader.device)
587
-
588
- outputs = model.generate(
589
- **inputs,
590
- max_new_tokens=2500,
591
- temperature=0.3, # Lower for more consistent results
592
- do_sample=True,
593
- top_p=0.9,
594
- repetition_penalty=1.2
595
- )
596
-
597
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
598
-
599
- # More robust JSON extraction
600
- try:
601
- if '```json' in response:
602
- json_str = response.split('```json')[1].split('```')[0].strip()
603
- else:
604
- json_str = response.split('{', 1)[1].rsplit('}', 1)[0]
605
- json_str = '{' + json_str + '}'
606
-
607
- parsed_data = json.loads(json_str)
608
 
609
- # Validate required fields
610
- if not all(k in parsed_data for k in ["student_info", "course_history"]):
611
- raise ValueError("Missing required fields in AI response")
612
-
613
- return parsed_data
614
 
615
- except Exception as e:
616
- logging.error(f"JSON parsing failed: {str(e)}")
617
- raise ValueError(f"AI returned invalid format. Please try again.")
 
 
 
 
 
 
 
618
 
619
- except Exception as e:
620
- logging.error(f"AI parsing error: {str(e)}")
621
- raise gr.Error(f"Failed to parse transcript: {str(e)}")
622
-
623
- def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
624
- """Enhanced AI parsing with fallback to structured parsing"""
625
- try:
626
- # First try structured parsing
627
- if progress:
628
- progress(0.1, desc="Attempting structured parsing...")
629
-
630
- parser = TranscriptParser()
631
- parsed_data = parser.parse_transcript(text)
632
-
633
- # Validate the parsed data
634
- if not validate_parsed_data(parsed_data):
635
- raise ValueError("Structured parsing returned incomplete data")
636
 
637
- if progress:
638
- progress(0.8, desc="Formatting results...")
639
-
640
- return parsed_data
641
-
642
- except Exception as e:
643
- logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
644
-
645
- # Fall back to AI parsing if structured parsing fails
646
- return parse_transcript_with_ai_fallback(text, progress)
647
-
648
- async def parse_transcript_async(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
649
- """Async wrapper for transcript parsing"""
650
- loop = asyncio.get_event_loop()
651
- return await loop.run_in_executor(executor, parse_transcript, file_obj, progress)
652
 
653
  def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
654
- """Main function to parse transcript files with better error handling"""
655
  try:
656
  if not file_obj:
657
  raise ValueError("Please upload a file first")
@@ -659,46 +299,29 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
659
  validate_file(file_obj)
660
  file_ext = os.path.splitext(file_obj.name)[1].lower()
661
 
662
- # Extract text from file with better error reporting
663
  if progress:
664
  progress(0.2, desc="Extracting text from file...")
665
 
666
  text = extract_text_from_file(file_obj.name, file_ext)
667
 
668
  if not text.strip():
669
- raise ValueError("No text could be extracted from the file. The file may be corrupted or in an unsupported format.")
670
 
671
- # Try structured parsing first
672
  if progress:
673
- progress(0.4, desc="Attempting structured parsing...")
674
 
675
  parser = TranscriptParser()
676
- try:
677
- parsed_data = parser.parse_transcript(text)
678
- if validate_parsed_data(parsed_data):
679
- if progress:
680
- progress(0.9, desc="Formatting results...")
681
- return format_transcript_output(parsed_data), parsed_data
682
- except Exception as e:
683
- logging.warning(f"Structured parsing failed: {str(e)}")
684
 
685
- # Fall back to AI if structured fails
686
- if progress:
687
- progress(0.5, desc="Using AI analysis...")
688
-
689
- parsed_data = parse_transcript_with_ai_fallback(text, progress)
690
- return format_transcript_output(parsed_data), parsed_data
691
 
692
  except Exception as e:
693
  error_msg = f"Error processing transcript: {str(e)}"
694
- # Add specific troubleshooting tips
695
- if "PDF" in str(e):
696
- error_msg += "\n\nTIPS:\n1. Try converting to image (screenshot)\n2. Ensure text is selectable in PDF\n3. Try a different PDF reader"
697
- elif "image" in str(e).lower():
698
- error_msg += "\n\nTIPS:\n1. Use high contrast images\n2. Crop to just the transcript\n3. Ensure good lighting"
699
- elif "AI" in str(e):
700
- error_msg += "\n\nTIPS:\n1. Try a smaller section of the transcript\n2. Check for sensitive info that may be redacted\n3. Try again later"
701
-
702
  logging.error(error_msg)
703
  return error_msg, None
704
 
@@ -811,8 +434,8 @@ class LearningStyleQuiz:
811
  }
812
 
813
  def evaluate_quiz(self, *answers) -> str:
814
- """Evaluate quiz answers and generate enhanced results."""
815
- answers = list(answers) # Convert tuple to list
816
  if len(answers) != len(self.questions):
817
  raise gr.Error("Please answer all questions before submitting")
818
 
@@ -820,7 +443,7 @@ class LearningStyleQuiz:
820
 
821
  for i, answer in enumerate(answers):
822
  if not answer:
823
- continue # Skip unanswered questions
824
 
825
  for j, style in enumerate(self.learning_styles):
826
  if answer == self.options[i][j]:
@@ -834,7 +457,6 @@ class LearningStyleQuiz:
834
  percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
835
  sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
836
 
837
- # Generate enhanced results report
838
  result = "## Your Learning Style Results\n\n"
839
  result += "### Scores:\n"
840
  for style, score in sorted_styles:
@@ -860,7 +482,6 @@ class LearningStyleQuiz:
860
  for career in style_info['careers'][:6]:
861
  result += f"- {career}\n"
862
 
863
- # Add complementary strategies
864
  complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
865
  result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
866
  for tip in self.learning_styles[complementary]['tips'][:3]:
@@ -883,7 +504,6 @@ class LearningStyleQuiz:
883
 
884
  return result
885
 
886
- # Initialize quiz instance
887
  learning_style_quiz = LearningStyleQuiz()
888
 
889
  # ========== PROFILE MANAGEMENT ==========
@@ -894,13 +514,10 @@ class ProfileManager:
894
  self.current_session = None
895
 
896
  def set_session(self, session_token: str) -> None:
897
- """Set the current session token."""
898
  self.current_session = session_token
899
 
900
  def get_profile_path(self, name: str) -> Path:
901
- """Get profile path with session token if available."""
902
  if self.current_session:
903
- # Hash the name for security
904
  name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
905
  return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
906
  return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
@@ -910,22 +527,9 @@ class ProfileManager:
910
  movie: str, movie_reason: str, show: str, show_reason: str,
911
  book: str, book_reason: str, character: str, character_reason: str,
912
  blog: str) -> str:
913
- """Save student profile with better validation messages"""
914
  try:
915
- # Validate required fields with specific messages
916
- if not name.strip():
917
- raise ValueError("Name cannot be empty. Please enter your full name.")
918
- if len(name) > 100:
919
- raise ValueError("Name is too long (maximum 100 characters).")
920
- if any(c.isdigit() for c in name):
921
- raise ValueError("Name cannot contain numbers.")
922
-
923
- try:
924
- age_int = int(age)
925
- if not MIN_AGE <= age_int <= MAX_AGE:
926
- raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
927
- except (ValueError, TypeError):
928
- raise ValueError("Please enter a valid age number.")
929
 
930
  if not interests.strip():
931
  raise ValueError("Please describe at least one interest or hobby.")
@@ -933,11 +537,9 @@ class ProfileManager:
933
  if not transcript:
934
  raise ValueError("Please complete the transcript analysis first.")
935
 
936
- # Validate learning style quiz completion
937
  if not learning_style or "Your primary learning style is:" not in learning_style:
938
  raise ValueError("Please complete the learning style quiz first.")
939
 
940
- # Prepare favorites data
941
  favorites = {
942
  "movie": sanitize_input(movie),
943
  "movie_reason": sanitize_input(movie_reason),
@@ -949,26 +551,23 @@ class ProfileManager:
949
  "character_reason": sanitize_input(character_reason)
950
  }
951
 
952
- # Prepare full profile data
953
  data = {
954
  "name": name,
955
- "age": age_int,
956
  "interests": sanitize_input(interests),
957
- "transcript": transcript if transcript else {},
958
- "learning_style": learning_style if learning_style else "Not assessed",
959
  "favorites": favorites,
960
  "blog": sanitize_input(blog) if blog else "",
961
  "session_token": self.current_session,
962
  "last_updated": time.time()
963
  }
964
 
965
- # Save to JSON file
966
  filepath = self.get_profile_path(name)
967
 
968
  with open(filepath, "w", encoding='utf-8') as f:
969
  json.dump(data, f, indent=2, ensure_ascii=False)
970
 
971
- # Upload to HF Hub if token is available
972
  if HF_TOKEN and 'hf_api' in globals():
973
  try:
974
  hf_api.upload_file(
@@ -980,14 +579,17 @@ class ProfileManager:
980
  except Exception as e:
981
  logging.error(f"Failed to upload to HF Hub: {str(e)}")
982
 
983
- return self._generate_profile_summary(data)
 
 
 
 
984
 
985
  except Exception as e:
986
  logging.error(f"Profile validation error: {str(e)}")
987
  raise gr.Error(f"Couldn't save profile: {str(e)}")
988
-
989
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
990
- """Load profile by name or return the first one found."""
991
  try:
992
  if session_token:
993
  profile_pattern = f"*{session_token}_profile.json"
@@ -999,7 +601,6 @@ class ProfileManager:
999
  return {}
1000
 
1001
  if name:
1002
- # Find profile by name (hashed)
1003
  name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
1004
  if session_token:
1005
  profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json"
@@ -1007,7 +608,6 @@ class ProfileManager:
1007
  profile_file = self.profiles_dir / f"{name_hash}_profile.json"
1008
 
1009
  if not profile_file.exists():
1010
- # Try loading from HF Hub
1011
  if HF_TOKEN and 'hf_api' in globals():
1012
  try:
1013
  hf_api.download_file(
@@ -1021,12 +621,10 @@ class ProfileManager:
1021
  else:
1022
  raise gr.Error(f"No profile found for {name}")
1023
  else:
1024
- # Load the first profile found
1025
  profile_file = profiles[0]
1026
 
1027
  with open(profile_file, "r", encoding='utf-8') as f:
1028
  profile_data = json.load(f)
1029
- # Check session timeout
1030
  if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
1031
  raise gr.Error("Session expired. Please start a new session.")
1032
  return profile_data
@@ -1036,13 +634,11 @@ class ProfileManager:
1036
  return {}
1037
 
1038
  def list_profiles(self, session_token: str = None) -> List[str]:
1039
- """List all available profile names for the current session."""
1040
  if session_token:
1041
  profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
1042
  else:
1043
  profiles = list(self.profiles_dir.glob("*.json"))
1044
 
1045
- # Extract just the name part (without session token)
1046
  profile_names = []
1047
  for p in profiles:
1048
  with open(p, "r", encoding='utf-8') as f:
@@ -1053,336 +649,54 @@ class ProfileManager:
1053
  continue
1054
 
1055
  return profile_names
1056
-
1057
- def _generate_profile_summary(self, data: Dict) -> str:
1058
- """Generate markdown summary of the profile."""
1059
- transcript = data.get("transcript", {})
1060
- favorites = data.get("favorites", {})
1061
-
1062
- # Extract just the learning style name
1063
- learning_style = data.get("learning_style", "")
1064
- if "Your primary learning style is:" in learning_style:
1065
- style_match = re.search(r"Your primary learning style is: \*\*(.*?)\*\*", learning_style)
1066
- if style_match:
1067
- learning_style = style_match.group(1)
1068
-
1069
- markdown = f"""## Student Profile: {data['name']}
1070
- ### Basic Information
1071
- - **Age:** {data['age']}
1072
- - **Interests:** {data.get('interests', 'Not specified')}
1073
- - **Learning Style:** {learning_style}
1074
- ### Academic Information
1075
- {self._format_transcript(transcript)}
1076
- ### Favorites
1077
- - **Movie:** {favorites.get('movie', 'Not specified')}
1078
- *Reason:* {favorites.get('movie_reason', 'Not specified')}
1079
- - **TV Show:** {favorites.get('show', 'Not specified')}
1080
- *Reason:* {favorites.get('show_reason', 'Not specified')}
1081
- - **Book:** {favorites.get('book', 'Not specified')}
1082
- *Reason:* {favorites.get('book_reason', 'Not specified')}
1083
- - **Character:** {favorites.get('character', 'Not specified')}
1084
- *Reason:* {favorites.get('character_reason', 'Not specified')}
1085
- ### Personal Blog
1086
- {data.get('blog', '_No blog provided_')}
1087
- """
1088
- return markdown
1089
-
1090
- def _format_transcript(self, transcript: Dict) -> str:
1091
- """Format transcript data for display."""
1092
- if not transcript or "course_history" not in transcript:
1093
- return "_No transcript information available_"
1094
-
1095
- display = "#### Course History\n"
1096
- courses_by_year = defaultdict(list)
1097
- for course in transcript.get("course_history", []):
1098
- if course.get("school_year"):
1099
- courses_by_year[course["school_year"]].append(course)
1100
-
1101
- if courses_by_year:
1102
- for year in sorted(courses_by_year.keys()):
1103
- display += f"\n**{year}**\n"
1104
- for course in courses_by_year[year]:
1105
- display += f"- {course.get('course_code', '')} {course.get('description', 'Unnamed course')}"
1106
- if 'grade' in course and course['grade']:
1107
- display += f" (Grade: {course['grade']})"
1108
- if 'credits' in course:
1109
- display += f" | Credits: {course['credits']}"
1110
- display += f" | Category: {course.get('requirement_category', 'N/A')}\n"
1111
-
1112
- if 'student_info' in transcript:
1113
- student = transcript['student_info']
1114
- display += "\n**Academic Summary**\n"
1115
- display += f"- Unweighted GPA: {student.get('unweighted_gpa', 'N/A')}\n"
1116
- display += f"- Weighted GPA: {student.get('weighted_gpa', 'N/A')}\n"
1117
- display += f"- Total Credits: {student.get('total_credits', 'N/A')}\n"
1118
-
1119
- if 'graduation_status' in transcript:
1120
- status = transcript['graduation_status']
1121
- display += "\n**Graduation Progress**\n"
1122
- display += f"- Completion: {status.get('percent_complete', 0)}%\n"
1123
- display += f"- Credits Required: {status.get('total_required_credits', 0)}\n"
1124
- display += f"- Credits Completed: {status.get('total_completed_credits', 0)}\n"
1125
- display += f"- On Track: {'Yes' if status.get('on_track', False) else 'No'}\n"
1126
-
1127
- return display
1128
 
1129
- # Initialize profile manager
1130
  profile_manager = ProfileManager()
1131
 
1132
  # ========== AI TEACHING ASSISTANT ==========
1133
  class TeachingAssistant:
1134
  def __init__(self):
1135
  self.context_history = []
1136
- self.max_context_length = 5 # Keep last 5 exchanges for context
1137
 
1138
  async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
1139
- """Generate personalized response based on student profile and context."""
1140
  try:
1141
- # Load profile with session token
1142
  profile = profile_manager.load_profile(session_token=session_token)
1143
  if not profile:
1144
- return "Please complete and save your profile first using the previous tabs."
1145
 
1146
- # Update context history
1147
  self._update_context(message, history)
1148
 
1149
- # Extract profile information
1150
- name = profile.get("name", "there")
1151
- learning_style = profile.get("learning_style", "")
1152
- grade_level = profile.get("transcript", {}).get("student_info", {}).get("current_grade", "unknown")
1153
- gpa = profile.get("transcript", {}).get("student_info", {})
1154
- interests = profile.get("interests", "")
1155
- courses = profile.get("transcript", {}).get("course_history", [])
1156
- favorites = profile.get("favorites", {})
1157
-
1158
- # Process message with context
1159
- response = await self._process_message(message, profile)
1160
 
1161
- # Add follow-up suggestions
1162
- if "study" in message.lower() or "learn" in message.lower():
1163
- response += "\n\nWould you like me to suggest a study schedule based on your courses?"
1164
- elif "course" in message.lower() or "class" in message.lower():
1165
- response += "\n\nWould you like help finding resources for any of these courses?"
1166
-
1167
- return response
1168
 
1169
  except Exception as e:
1170
  logging.error(f"Error generating response: {str(e)}")
1171
- return "I encountered an error processing your request. Please try again."
1172
 
1173
  def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
1174
- """Maintain conversation context."""
1175
  self.context_history.append({"role": "user", "content": message})
1176
  if history:
1177
  for h in history[-self.max_context_length:]:
1178
- if h[0]: # User message
1179
  self.context_history.append({"role": "user", "content": h[0]})
1180
- if h[1]: # Assistant message
1181
  self.context_history.append({"role": "assistant", "content": h[1]})
1182
 
1183
- # Trim to maintain max context length
1184
  self.context_history = self.context_history[-(self.max_context_length*2):]
1185
-
1186
- async def _process_message(self, message: str, profile: Dict) -> str:
1187
- """Process user message with profile context."""
1188
- message_lower = message.lower()
1189
-
1190
- # Greetings
1191
- if any(greet in message_lower for greet in ["hi", "hello", "hey", "greetings"]):
1192
- return f"Hello {profile.get('name', 'there')}! How can I help you with your learning today?"
1193
-
1194
- # Study help
1195
- study_words = ["study", "learn", "prepare", "exam", "test", "homework"]
1196
- if any(word in message_lower for word in study_words):
1197
- return self._generate_study_advice(profile)
1198
-
1199
- # Grade help
1200
- grade_words = ["grade", "gpa", "score", "marks", "results"]
1201
- if any(word in message_lower for word in grade_words):
1202
- return self._generate_grade_advice(profile)
1203
-
1204
- # Interest help
1205
- interest_words = ["interest", "hobby", "passion", "extracurricular"]
1206
- if any(word in message_lower for word in interest_words):
1207
- return self._generate_interest_advice(profile)
1208
-
1209
- # Course help
1210
- course_words = ["courses", "classes", "transcript", "schedule", "subject"]
1211
- if any(word in message_lower for word in course_words):
1212
- return self._generate_course_advice(profile)
1213
-
1214
- # Favorites
1215
- favorite_words = ["movie", "show", "book", "character", "favorite"]
1216
- if any(word in message_lower for word in favorite_words):
1217
- return self._generate_favorites_response(profile)
1218
-
1219
- # General help
1220
- if "help" in message_lower:
1221
- return self._generate_help_response()
1222
-
1223
- # Default response
1224
- return ("I'm your personalized teaching assistant. I can help with study tips, "
1225
- "grade information, course advice, and more. Try asking about how to "
1226
- "study effectively or about your course history.")
1227
-
1228
- def _generate_study_advice(self, profile: Dict) -> str:
1229
- """Generate study advice based on learning style."""
1230
- learning_style = profile.get("learning_style", "")
1231
- response = ""
1232
-
1233
- if "Visual" in learning_style:
1234
- response = ("Based on your visual learning style, I recommend:\n"
1235
- "- Creating colorful mind maps or diagrams\n"
1236
- "- Using highlighters to color-code your notes\n"
1237
- "- Watching educational videos on the topics\n"
1238
- "- Creating flashcards with images\n\n")
1239
- elif "Auditory" in learning_style:
1240
- response = ("Based on your auditory learning style, I recommend:\n"
1241
- "- Recording your notes and listening to them\n"
1242
- "- Participating in study groups to discuss concepts\n"
1243
- "- Explaining the material out loud to yourself\n"
1244
- "- Finding podcasts or audio lectures on the topics\n\n")
1245
- elif "Reading/Writing" in learning_style:
1246
- response = ("Based on your reading/writing learning style, I recommend:\n"
1247
- "- Writing detailed summaries in your own words\n"
1248
- "- Creating organized outlines of the material\n"
1249
- "- Reading additional textbooks or articles\n"
1250
- "- Rewriting your notes to reinforce learning\n\n")
1251
- elif "Kinesthetic" in learning_style:
1252
- response = ("Based on your kinesthetic learning style, I recommend:\n"
1253
- "- Creating physical models or demonstrations\n"
1254
- "- Using hands-on activities to learn concepts\n"
1255
- "- Taking frequent movement breaks while studying\n"
1256
- "- Associating information with physical actions\n\n")
1257
- else:
1258
- response = ("Here are some general study tips:\n"
1259
- "- Use the Pomodoro technique (25 min study, 5 min break)\n"
1260
- "- Space out your study sessions over time\n"
1261
- "- Test yourself with practice questions\n"
1262
- "- Teach the material to someone else\n\n")
1263
-
1264
- # Add time management advice
1265
- response += ("**Time Management Tips**:\n"
1266
- "- Create a study schedule and stick to it\n"
1267
- "- Prioritize difficult subjects when you're most alert\n"
1268
- "- Break large tasks into smaller, manageable chunks\n"
1269
- "- Set specific goals for each study session")
1270
-
1271
- return response
1272
-
1273
- def _generate_grade_advice(self, profile: Dict) -> str:
1274
- """Generate response about grades and GPA."""
1275
- gpa = profile.get("transcript", {}).get("student_info", {})
1276
- courses = profile.get("transcript", {}).get("course_history", [])
1277
-
1278
- response = (f"Your GPA information:\n"
1279
- f"- Unweighted: {gpa.get('unweighted_gpa', 'N/A')}\n"
1280
- f"- Weighted: {gpa.get('weighted_gpa', 'N/A')}\n\n")
1281
-
1282
- # Identify any failing grades
1283
- weak_subjects = []
1284
- for course in courses:
1285
- if course.get('grade', '').upper() in ['D', 'F']:
1286
- weak_subjects.append(f"{course.get('course_code', '')} {course.get('description', 'Unknown course')}")
1287
-
1288
- if weak_subjects:
1289
- response += ("**Areas for Improvement**:\n"
1290
- f"You might want to focus on these subjects: {', '.join(weak_subjects)}\n\n")
1291
-
1292
- response += ("**Grade Improvement Strategies**:\n"
1293
- "- Meet with your teachers to discuss your performance\n"
1294
- "- Identify specific areas where you lost points\n"
1295
- "- Create a targeted study plan for weak areas\n"
1296
- "- Practice with past exams or sample questions")
1297
-
1298
- return response
1299
-
1300
- def _generate_interest_advice(self, profile: Dict) -> str:
1301
- """Generate response based on student interests."""
1302
- interests = profile.get("interests", "")
1303
- response = f"I see you're interested in: {interests}\n\n"
1304
-
1305
- response += ("**Suggestions**:\n"
1306
- "- Look for clubs or extracurricular activities related to these interests\n"
1307
- "- Explore career paths that align with these interests\n"
1308
- "- Find online communities or forums about these topics\n"
1309
- "- Consider projects or independent study in these areas")
1310
-
1311
- return response
1312
-
1313
- def _generate_course_advice(self, profile: Dict) -> str:
1314
- """Generate response about courses."""
1315
- courses = profile.get("transcript", {}).get("course_history", [])
1316
- grade_level = profile.get("transcript", {}).get("student_info", {}).get("current_grade", "unknown")
1317
-
1318
- response = "Here's a summary of your courses by year:\n"
1319
- courses_by_year = defaultdict(list)
1320
- for course in courses:
1321
- if course.get("school_year"):
1322
- courses_by_year[course["school_year"]].append(course)
1323
-
1324
- for year in sorted(courses_by_year.keys()):
1325
- response += f"\n**{year}**:\n"
1326
- for course in courses_by_year[year]:
1327
- response += f"- {course.get('course_code', '')} {course.get('description', 'Unnamed course')}"
1328
- if 'grade' in course:
1329
- response += f" (Grade: {course['grade']})"
1330
- response += "\n"
1331
-
1332
- response += f"\nAs a grade {grade_level} student, you might want to:\n"
1333
- if grade_level in ["9", "10"]:
1334
- response += ("- Focus on building strong foundational skills\n"
1335
- "- Explore different subjects to find your interests\n"
1336
- "- Start thinking about college/career requirements")
1337
- elif grade_level in ["11", "12"]:
1338
- response += ("- Focus on courses relevant to your college/career goals\n"
1339
- "- Consider taking AP or advanced courses if available\n"
1340
- "- Ensure you're meeting graduation requirements")
1341
-
1342
- return response
1343
-
1344
- def _generate_favorites_response(self, profile: Dict) -> str:
1345
- """Generate response about favorite items."""
1346
- favorites = profile.get("favorites", {})
1347
- response = "I see you enjoy:\n"
1348
-
1349
- if favorites.get('movie'):
1350
- response += f"- Movie: {favorites['movie']} ({favorites.get('movie_reason', 'no reason provided')})\n"
1351
- if favorites.get('show'):
1352
- response += f"- TV Show: {favorites['show']} ({favorites.get('show_reason', 'no reason provided')})\n"
1353
- if favorites.get('book'):
1354
- response += f"- Book: {favorites['book']} ({favorites.get('book_reason', 'no reason provided')})\n"
1355
- if favorites.get('character'):
1356
- response += f"- Character: {favorites['character']} ({favorites.get('character_reason', 'no reason provided')})\n"
1357
-
1358
- response += "\nThese preferences suggest you might enjoy:\n"
1359
- response += "- Similar books/movies in the same genre\n"
1360
- response += "- Creative projects related to these stories\n"
1361
- response += "- Analyzing themes or characters in your schoolwork"
1362
-
1363
- return response
1364
-
1365
- def _generate_help_response(self) -> str:
1366
- """Generate help response with available commands."""
1367
- return ("""I can help with:
1368
- - **Study tips**: "How should I study for math?"
1369
- - **Grade information**: "What's my GPA?"
1370
- - **Course advice**: "Show me my course history"
1371
- - **Interest suggestions**: "What clubs match my interests?"
1372
- - **General advice**: "How can I improve my grades?"
1373
- Try asking about any of these topics!""")
1374
 
1375
- # Initialize teaching assistant
1376
  teaching_assistant = TeachingAssistant()
1377
 
1378
  # ========== GRADIO INTERFACE ==========
1379
  def create_interface():
1380
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
1381
- # Session state
1382
  session_token = gr.State(value=generate_session_token())
1383
  profile_manager.set_session(session_token.value)
1384
 
1385
- # Track completion status for each tab
1386
  tab_completed = gr.State({
1387
  0: False, # Transcript Upload
1388
  1: False, # Learning Style Quiz
@@ -1391,7 +705,7 @@ def create_interface():
1391
  4: False # AI Assistant
1392
  })
1393
 
1394
- # Custom CSS with dark mode support
1395
  app.css = """
1396
  .gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
1397
  .tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
@@ -1404,7 +718,6 @@ def create_interface():
1404
  .quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
1405
  .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
1406
 
1407
- /* Dark mode support */
1408
  .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
1409
  .dark .quiz-question { background-color: #3d3d3d !important; }
1410
  .dark .quiz-results { background-color: #2e3d2e !important; }
@@ -1414,7 +727,7 @@ def create_interface():
1414
  .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
1415
  """
1416
 
1417
- # Header with dark mode toggle
1418
  with gr.Row():
1419
  with gr.Column(scale=4):
1420
  gr.Markdown("""
@@ -1440,7 +753,7 @@ def create_interface():
1440
 
1441
  nav_message = gr.HTML(visible=False)
1442
 
1443
- # Main tabs container - Now VISIBLE
1444
  with gr.Tabs(visible=True) as tabs:
1445
  # ===== TAB 1: TRANSCRIPT UPLOAD =====
1446
  with gr.Tab("Transcript", id=0):
@@ -1459,59 +772,25 @@ def create_interface():
1459
  with gr.Column(scale=2):
1460
  transcript_output = gr.Textbox(
1461
  label="Analysis Results",
1462
- lines=20,
1463
  interactive=False
1464
  )
1465
  transcript_data = gr.State()
1466
 
1467
- def process_transcript(file_obj, current_tab_status):
1468
- try:
1469
- if not file_obj:
1470
- raise ValueError("Please upload a transcript file first.")
1471
-
1472
- output_text, data = parse_transcript(file_obj)
1473
-
1474
- if "Error" in output_text:
1475
- return (
1476
- output_text,
1477
- None,
1478
- current_tab_status,
1479
- gr.update(),
1480
- gr.update(),
1481
- gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
1482
- gr.update(visible=False)
1483
- )
1484
-
1485
- new_status = current_tab_status.copy()
1486
- new_status[0] = True
1487
- return (
1488
- output_text,
1489
- data,
1490
- new_status,
1491
- gr.update(elem_classes="completed-tab"),
1492
- gr.update(interactive=True),
1493
- gr.update(visible=False),
1494
- gr.update(visible=False)
1495
- )
1496
-
1497
- except Exception as e:
1498
- error_msg = f"Error processing transcript: {str(e)}"
1499
- if "PDF" in str(e):
1500
- error_msg += "\n\nTIPS:\n- Try converting to image (screenshot)\n- Ensure text is selectable in PDF\n- Try a different PDF reader"
1501
- return (
1502
- error_msg,
1503
- None,
1504
- current_tab_status,
1505
- gr.update(),
1506
- gr.update(),
1507
- gr.update(visible=True, value=f"<div class='error-message'>{error_msg}</div>"),
1508
- gr.update(visible=False)
1509
- )
1510
-
1511
  upload_btn.click(
1512
- process_transcript,
1513
  inputs=[file_input, tab_completed],
1514
- outputs=[transcript_output, transcript_data, tab_completed, step1, step2, file_error, nav_message]
 
 
 
 
 
 
 
 
 
 
1515
  )
1516
 
1517
  # ===== TAB 2: LEARNING STYLE QUIZ =====
@@ -1542,7 +821,6 @@ def create_interface():
1542
  elem_classes="quiz-results"
1543
  )
1544
 
1545
- # Update progress bar as questions are answered
1546
  for component in quiz_components:
1547
  component.change(
1548
  fn=lambda *answers: {
@@ -1554,38 +832,23 @@ def create_interface():
1554
  outputs=progress
1555
  )
1556
 
1557
- def submit_quiz_and_update(*args):
1558
- current_tab_status = args[0]
1559
- answers = args[1:]
1560
-
1561
- try:
1562
- result = learning_style_quiz.evaluate_quiz(*answers)
1563
- new_status = current_tab_status.copy()
1564
- new_status[1] = True
1565
- return (
1566
- result,
1567
- gr.update(visible=True),
1568
- new_status,
1569
- gr.update(elem_classes="completed-tab"),
1570
- gr.update(interactive=True),
1571
- gr.update(value="<div class='alert-box'>Quiz submitted successfully!</div>", visible=True),
1572
- gr.update(visible=False)
1573
- )
1574
- except Exception as e:
1575
- return (
1576
- f"Error evaluating quiz: {str(e)}",
1577
- gr.update(visible=True),
1578
- current_tab_status,
1579
- gr.update(),
1580
- gr.update(),
1581
- gr.update(value=f"<div class='error-message'>Error: {str(e)}</div>", visible=True),
1582
- gr.update(visible=False)
1583
- )
1584
-
1585
  quiz_submit.click(
1586
- fn=submit_quiz_and_update,
1587
- inputs=[tab_completed] + quiz_components,
1588
- outputs=[learning_output, learning_output, tab_completed, step2, step3, quiz_alert, nav_message]
 
 
 
 
 
 
 
 
 
 
 
 
 
1589
  )
1590
 
1591
  quiz_clear.click(
@@ -1624,42 +887,22 @@ def create_interface():
1624
  character = gr.Textbox(label="Favorite Character (from any story)")
1625
  character_reason = gr.Textbox(label="Why do you like them?", lines=2)
1626
 
1627
- # Added blog section
1628
  with gr.Accordion("Personal Blog (Optional)", open=False):
1629
  blog = gr.Textbox(
1630
  label="Share your thoughts",
1631
- placeholder="Write something about yourself, your goals, or anything you'd like to share...",
1632
  lines=5
1633
  )
1634
 
1635
- def save_personal_info(name, age, interests, current_tab_status):
1636
- try:
1637
- name = validate_name(name)
1638
- age = validate_age(age)
1639
- interests = sanitize_input(interests)
1640
-
1641
- new_status = current_tab_status.copy()
1642
- new_status[2] = True
1643
- return (
1644
- new_status,
1645
- gr.update(elem_classes="completed-tab"),
1646
- gr.update(interactive=True),
1647
- gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True),
1648
- gr.update(visible=False)
1649
- )
1650
- except Exception as e:
1651
- return (
1652
- current_tab_status,
1653
- gr.update(),
1654
- gr.update(),
1655
- gr.update(visible=False),
1656
- gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
1657
- )
1658
-
1659
  save_personal_btn.click(
1660
- fn=save_personal_info,
 
 
 
 
 
1661
  inputs=[name, age, interests, tab_completed],
1662
- outputs=[tab_completed, step3, step4, save_confirmation, nav_message]
1663
  )
1664
 
1665
  # ===== TAB 4: SAVE & REVIEW =====
@@ -1686,69 +929,24 @@ def create_interface():
1686
  label="Profile Summary"
1687
  )
1688
 
1689
- def save_profile_and_update(name, age, interests, transcript_data, learning_style,
1690
- movie, movie_reason, show, show_reason,
1691
- book, book_reason, character, character_reason, blog,
1692
- current_tab_status):
1693
- try:
1694
- summary = profile_manager.save_profile(
1695
- name, age, interests, transcript_data, learning_style,
1696
- movie, movie_reason, show, show_reason,
1697
- book, book_reason, character, character_reason, blog
1698
- )
1699
- new_status = current_tab_status.copy()
1700
- new_status[3] = True
1701
- return (
1702
- summary,
1703
- new_status,
1704
- gr.update(elem_classes="completed-tab"),
1705
- gr.update(interactive=True),
1706
- gr.update(visible=False)
1707
- )
1708
- except Exception as e:
1709
- return (
1710
- f"Error saving profile: {str(e)}",
1711
- current_tab_status,
1712
- gr.update(),
1713
- gr.update(),
1714
- gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
1715
- )
1716
-
1717
  save_btn.click(
1718
- fn=save_profile_and_update,
1719
  inputs=[
1720
  name, age, interests, transcript_data, learning_output,
1721
  movie, movie_reason, show, show_reason,
1722
- book, book_reason, character, character_reason, blog,
1723
- tab_completed
1724
  ],
1725
- outputs=[output_summary, tab_completed, step4, step5, nav_message]
1726
  ).then(
1727
- fn=lambda: profile_manager.list_profiles(session_token.value),
1728
- outputs=load_profile_dropdown
 
1729
  ).then(
1730
- fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1731
- outputs=load_btn
1732
  ).then(
1733
- fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1734
- outputs=delete_btn
1735
- )
1736
-
1737
- def delete_profile(name, session_token):
1738
- if not name:
1739
- raise gr.Error("Please select a profile to delete")
1740
- try:
1741
- profile_path = profile_manager.get_profile_path(name)
1742
- if profile_path.exists():
1743
- profile_path.unlink()
1744
- return "Profile deleted successfully", ""
1745
- except Exception as e:
1746
- raise gr.Error(f"Error deleting profile: {str(e)}")
1747
-
1748
- delete_btn.click(
1749
- fn=delete_profile,
1750
- inputs=[load_profile_dropdown, session_token],
1751
- outputs=[output_summary, load_profile_dropdown]
1752
  ).then(
1753
  fn=lambda: profile_manager.list_profiles(session_token.value),
1754
  outputs=load_profile_dropdown
@@ -1759,23 +957,12 @@ def create_interface():
1759
  fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1760
  outputs=delete_btn
1761
  )
1762
-
1763
- clear_btn.click(
1764
- fn=lambda: [gr.update(value="") for _ in range(12)],
1765
- outputs=[
1766
- name, age, interests,
1767
- movie, movie_reason, show, show_reason,
1768
- book, book_reason, character, character_reason,
1769
- output_summary
1770
- ]
1771
- )
1772
 
1773
  # ===== TAB 5: AI ASSISTANT =====
1774
  with gr.Tab("AI Assistant", id=4):
1775
  gr.Markdown("## Your Personalized Learning Assistant")
1776
  gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
1777
 
1778
- # Create a wrapper function that properly awaits the async function
1779
  async def chat_wrapper(message: str, history: List[List[str]]):
1780
  response = await teaching_assistant.generate_response(
1781
  message,
@@ -1787,11 +974,10 @@ def create_interface():
1787
  chatbot = gr.ChatInterface(
1788
  fn=chat_wrapper,
1789
  examples=[
1790
- "How should I study for my next math test?",
1791
- "What's my current GPA?",
1792
- "Show me my course history",
1793
- "How can I improve my grades in science?",
1794
- "What study methods match my learning style?"
1795
  ],
1796
  title=""
1797
  )
@@ -1800,11 +986,9 @@ def create_interface():
1800
  def navigate_to_tab(tab_index: int, tab_completed_status):
1801
  current_tab = tabs.selected
1802
 
1803
- # Allow backward navigation
1804
  if tab_index <= current_tab:
1805
  return gr.Tabs(selected=tab_index), gr.update(visible=False)
1806
 
1807
- # Check if current tab is completed
1808
  if not tab_completed_status.get(current_tab, False):
1809
  messages = {
1810
  0: "Please complete the transcript analysis first.",
@@ -1822,7 +1006,6 @@ def create_interface():
1822
 
1823
  return gr.Tabs(selected=tab_index), gr.update(visible=False)
1824
 
1825
- # Connect navigation buttons
1826
  step1.click(
1827
  lambda idx, status: navigate_to_tab(idx, status),
1828
  inputs=[gr.State(0), tab_completed],
@@ -1864,7 +1047,6 @@ def create_interface():
1864
 
1865
  return app
1866
 
1867
- # Create and launch the interface
1868
  app = create_interface()
1869
 
1870
  if __name__ == "__main__":
 
41
  filename='transcript_parser.log'
42
  )
43
 
44
+ # Model configuration - Using smaller model
45
+ MODEL_NAME = "deepseek-ai/deepseek-llm-1.3b"
46
 
47
  # Initialize Hugging Face API
48
  if HF_TOKEN:
 
52
  except Exception as e:
53
  logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
54
 
 
 
 
 
 
 
 
 
55
  # ========== MODEL LOADER ==========
56
  class ModelLoader:
57
  def __init__(self):
 
68
  if progress:
69
  progress(0.1, desc="Checking GPU availability...")
70
 
 
71
  torch.cuda.empty_cache()
72
 
73
  if progress:
 
81
  if progress:
82
  progress(0.5, desc="Loading model (this may take a few minutes)...")
83
 
 
84
  model_kwargs = {
85
  "trust_remote_code": True,
86
  "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
87
  "device_map": "auto" if self.device == "cuda" else None,
88
  "low_cpu_mem_usage": True,
89
+ "offload_folder": "offload"
90
  }
91
 
92
  try:
 
95
  **model_kwargs
96
  )
97
  except torch.cuda.OutOfMemoryError:
 
98
  model_kwargs["device_map"] = None
99
  model = AutoModelForCausalLM.from_pretrained(
100
  MODEL_NAME,
 
102
  ).to('cpu')
103
  self.device = 'cpu'
104
 
 
105
  test_input = tokenizer("Test", return_tensors="pt").to(self.device)
106
  _ = model.generate(**test_input, max_new_tokens=1)
107
 
 
119
  # Initialize model loader
120
  model_loader = ModelLoader()
121
 
122
+ @lru_cache(maxsize=1)
123
+ def get_model_and_tokenizer():
124
+ return model_loader.load_model()
125
+
126
  # ========== UTILITY FUNCTIONS ==========
127
  def generate_session_token() -> str:
 
128
  alphabet = string.ascii_letters + string.digits
129
  return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
130
 
131
  def sanitize_input(text: str) -> str:
 
132
  if not text:
133
  return ""
 
134
  text = html.escape(text.strip())
 
135
  text = re.sub(r'<[^>]*>', '', text)
 
136
  text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
137
  return text
138
 
139
  def validate_name(name: str) -> str:
 
140
  name = name.strip()
141
  if not name:
142
+ raise ValueError("Name cannot be empty.")
143
  if len(name) > 100:
144
  raise ValueError("Name is too long (maximum 100 characters).")
145
  if any(c.isdigit() for c in name):
 
147
  return name
148
 
149
  def validate_age(age: Union[int, float, str]) -> int:
 
150
  try:
151
  age_int = int(age)
152
  if not MIN_AGE <= age_int <= MAX_AGE:
 
156
  raise ValueError("Please enter a valid age number.")
157
 
158
  def validate_file(file_obj) -> None:
 
159
  if not file_obj:
160
  raise ValueError("Please upload a file first")
161
 
 
163
  if file_ext not in ALLOWED_FILE_TYPES:
164
  raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
165
 
166
+ file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
167
  if file_size > MAX_FILE_SIZE_MB:
168
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
169
 
170
  # ========== TEXT EXTRACTION FUNCTIONS ==========
171
  def extract_text_from_file(file_path: str, file_ext: str) -> str:
 
172
  text = ""
173
 
174
  try:
175
  if file_ext == '.pdf':
 
176
  try:
177
  doc = fitz.open(file_path)
178
  for page in doc:
179
  text += page.get_text("text") + '\n'
180
  if not text.strip():
181
+ raise ValueError("PyMuPDF returned empty text")
182
  except Exception as e:
183
  logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
184
  text = extract_text_from_pdf_with_ocr(file_path)
 
186
  elif file_ext in ['.png', '.jpg', '.jpeg']:
187
  text = extract_text_with_ocr(file_path)
188
 
 
189
  text = clean_extracted_text(text)
190
 
191
  if not text.strip():
192
+ raise ValueError("No text could be extracted.")
193
 
194
  return text
195
 
196
  except Exception as e:
197
  logging.error(f"Text extraction error: {str(e)}")
198
+ raise gr.Error(f"Failed to extract text: {str(e)}")
199
 
200
  def extract_text_from_pdf_with_ocr(file_path: str) -> str:
 
201
  text = ""
202
  try:
203
  doc = fitz.open(file_path)
204
  for page in doc:
205
  pix = page.get_pixmap()
206
  img = Image.open(io.BytesIO(pix.tobytes()))
207
+ img = img.convert('L')
208
+ img = img.point(lambda x: 0 if x < 128 else 255)
 
209
  text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
210
  except Exception as e:
211
+ raise ValueError(f"PDF OCR failed: {str(e)}")
212
  return text
213
 
214
  def extract_text_with_ocr(file_path: str) -> str:
 
215
  try:
216
  image = Image.open(file_path)
217
+ image = image.convert('L')
218
+ image = image.point(lambda x: 0 if x < 128 else 255, '1')
 
 
 
 
219
  custom_config = r'--oem 3 --psm 6'
220
  text = pytesseract.image_to_string(image, config=custom_config)
221
  return text
222
  except Exception as e:
223
+ raise ValueError(f"OCR processing failed: {str(e)}")
224
 
225
  def clean_extracted_text(text: str) -> str:
 
 
226
  text = re.sub(r'\s+', ' ', text).strip()
 
 
227
  replacements = {
228
  '|': 'I',
229
  '‘': "'",
 
233
  'fi': 'fi',
234
  'fl': 'fl'
235
  }
 
236
  for wrong, right in replacements.items():
237
  text = text.replace(wrong, right)
 
238
  return text
239
 
240
  def remove_sensitive_info(text: str) -> str:
 
 
241
  text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text)
 
242
  text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
 
243
  text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
244
  return text
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  # ========== TRANSCRIPT PARSING ==========
247
  class TranscriptParser:
248
  def __init__(self):
 
253
  self.graduation_status = {}
254
 
255
  def parse_transcript(self, text: str) -> Dict:
256
+ """Simplified transcript parser that extracts key information"""
257
  try:
258
+ parsed_data = {
259
+ 'student_info': {},
260
+ 'course_history': []
261
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ # Extract student information
264
+ name_match = re.search(r'(?:Name|Student)[:\s]+([A-Za-z,\s]+)', text, re.IGNORECASE)
265
+ if name_match:
266
+ parsed_data['student_info']['name'] = name_match.group(1).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
+ id_match = re.search(r'(?:ID|Student\s*ID)[:\s]+([A-Za-z0-9-]+)', text, re.IGNORECASE)
269
+ if id_match:
270
+ parsed_data['student_info']['id'] = id_match.group(1).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
+ gpa_match = re.search(r'(?:GPA|Grade\s*Point\s*Average)[:\s]+([0-9.]+)', text, re.IGNORECASE)
273
+ if gpa_match:
274
+ parsed_data['student_info']['gpa'] = float(gpa_match.group(1))
 
 
275
 
276
+ # Extract courses (simplified pattern)
277
+ course_pattern = r'([A-Z]{2,4}\s?\d{3})\s+(.*?)\s+([A-F][+-]?)\s+([0-9.]+)'
278
+ courses = re.findall(course_pattern, text)
279
+ for course in courses:
280
+ parsed_data['course_history'].append({
281
+ 'course_code': course[0],
282
+ 'description': course[1],
283
+ 'grade': course[2],
284
+ 'credits': float(course[3])
285
+ })
286
 
287
+ return parsed_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ except Exception as e:
290
+ logging.error(f"Error parsing transcript: {str(e)}")
291
+ raise ValueError(f"Couldn't parse transcript: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
294
+ """Process transcript file and return simple confirmation"""
295
  try:
296
  if not file_obj:
297
  raise ValueError("Please upload a file first")
 
299
  validate_file(file_obj)
300
  file_ext = os.path.splitext(file_obj.name)[1].lower()
301
 
 
302
  if progress:
303
  progress(0.2, desc="Extracting text from file...")
304
 
305
  text = extract_text_from_file(file_obj.name, file_ext)
306
 
307
  if not text.strip():
308
+ raise ValueError("No text could be extracted from the file.")
309
 
 
310
  if progress:
311
+ progress(0.5, desc="Parsing transcript...")
312
 
313
  parser = TranscriptParser()
314
+ parsed_data = parser.parse_transcript(text)
 
 
 
 
 
 
 
315
 
316
+ # Return simple confirmation message
317
+ confirmation = "Transcript processed successfully."
318
+ if 'gpa' in parsed_data.get('student_info', {}):
319
+ confirmation += f"\nGPA detected: {parsed_data['student_info']['gpa']}"
320
+
321
+ return confirmation, parsed_data
322
 
323
  except Exception as e:
324
  error_msg = f"Error processing transcript: {str(e)}"
 
 
 
 
 
 
 
 
325
  logging.error(error_msg)
326
  return error_msg, None
327
 
 
434
  }
435
 
436
  def evaluate_quiz(self, *answers) -> str:
437
+ """Evaluate quiz answers and return learning style results"""
438
+ answers = list(answers)
439
  if len(answers) != len(self.questions):
440
  raise gr.Error("Please answer all questions before submitting")
441
 
 
443
 
444
  for i, answer in enumerate(answers):
445
  if not answer:
446
+ continue
447
 
448
  for j, style in enumerate(self.learning_styles):
449
  if answer == self.options[i][j]:
 
457
  percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
458
  sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
459
 
 
460
  result = "## Your Learning Style Results\n\n"
461
  result += "### Scores:\n"
462
  for style, score in sorted_styles:
 
482
  for career in style_info['careers'][:6]:
483
  result += f"- {career}\n"
484
 
 
485
  complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
486
  result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
487
  for tip in self.learning_styles[complementary]['tips'][:3]:
 
504
 
505
  return result
506
 
 
507
  learning_style_quiz = LearningStyleQuiz()
508
 
509
  # ========== PROFILE MANAGEMENT ==========
 
514
  self.current_session = None
515
 
516
  def set_session(self, session_token: str) -> None:
 
517
  self.current_session = session_token
518
 
519
  def get_profile_path(self, name: str) -> Path:
 
520
  if self.current_session:
 
521
  name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
522
  return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
523
  return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
 
527
  movie: str, movie_reason: str, show: str, show_reason: str,
528
  book: str, book_reason: str, character: str, character_reason: str,
529
  blog: str) -> str:
 
530
  try:
531
+ name = validate_name(name)
532
+ age = validate_age(age)
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
  if not interests.strip():
535
  raise ValueError("Please describe at least one interest or hobby.")
 
537
  if not transcript:
538
  raise ValueError("Please complete the transcript analysis first.")
539
 
 
540
  if not learning_style or "Your primary learning style is:" not in learning_style:
541
  raise ValueError("Please complete the learning style quiz first.")
542
 
 
543
  favorites = {
544
  "movie": sanitize_input(movie),
545
  "movie_reason": sanitize_input(movie_reason),
 
551
  "character_reason": sanitize_input(character_reason)
552
  }
553
 
 
554
  data = {
555
  "name": name,
556
+ "age": age,
557
  "interests": sanitize_input(interests),
558
+ "transcript": transcript,
559
+ "learning_style": learning_style,
560
  "favorites": favorites,
561
  "blog": sanitize_input(blog) if blog else "",
562
  "session_token": self.current_session,
563
  "last_updated": time.time()
564
  }
565
 
 
566
  filepath = self.get_profile_path(name)
567
 
568
  with open(filepath, "w", encoding='utf-8') as f:
569
  json.dump(data, f, indent=2, ensure_ascii=False)
570
 
 
571
  if HF_TOKEN and 'hf_api' in globals():
572
  try:
573
  hf_api.upload_file(
 
579
  except Exception as e:
580
  logging.error(f"Failed to upload to HF Hub: {str(e)}")
581
 
582
+ # Return simple confirmation with GPA if available
583
+ confirmation = f"Profile saved successfully for {name}."
584
+ if 'gpa' in data.get('transcript', {}).get('student_info', {}):
585
+ confirmation += f"\nGPA: {data['transcript']['student_info']['gpa']}"
586
+ return confirmation
587
 
588
  except Exception as e:
589
  logging.error(f"Profile validation error: {str(e)}")
590
  raise gr.Error(f"Couldn't save profile: {str(e)}")
591
+
592
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
 
593
  try:
594
  if session_token:
595
  profile_pattern = f"*{session_token}_profile.json"
 
601
  return {}
602
 
603
  if name:
 
604
  name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
605
  if session_token:
606
  profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json"
 
608
  profile_file = self.profiles_dir / f"{name_hash}_profile.json"
609
 
610
  if not profile_file.exists():
 
611
  if HF_TOKEN and 'hf_api' in globals():
612
  try:
613
  hf_api.download_file(
 
621
  else:
622
  raise gr.Error(f"No profile found for {name}")
623
  else:
 
624
  profile_file = profiles[0]
625
 
626
  with open(profile_file, "r", encoding='utf-8') as f:
627
  profile_data = json.load(f)
 
628
  if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
629
  raise gr.Error("Session expired. Please start a new session.")
630
  return profile_data
 
634
  return {}
635
 
636
  def list_profiles(self, session_token: str = None) -> List[str]:
 
637
  if session_token:
638
  profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
639
  else:
640
  profiles = list(self.profiles_dir.glob("*.json"))
641
 
 
642
  profile_names = []
643
  for p in profiles:
644
  with open(p, "r", encoding='utf-8') as f:
 
649
  continue
650
 
651
  return profile_names
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
 
 
653
  profile_manager = ProfileManager()
654
 
655
  # ========== AI TEACHING ASSISTANT ==========
656
  class TeachingAssistant:
657
  def __init__(self):
658
  self.context_history = []
659
+ self.max_context_length = 5
660
 
661
  async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
 
662
  try:
 
663
  profile = profile_manager.load_profile(session_token=session_token)
664
  if not profile:
665
+ return "Please complete and save your profile first."
666
 
 
667
  self._update_context(message, history)
668
 
669
+ # Focus on GPA if mentioned
670
+ if "gpa" in message.lower():
671
+ gpa = profile.get("transcript", {}).get("student_info", {}).get("gpa", "unknown")
672
+ return f"Your GPA is {gpa}. Would you like advice on improving it?"
 
 
 
 
 
 
 
673
 
674
+ # Generic response otherwise
675
+ return "I'm your learning assistant. Ask me about your GPA, courses, or study tips."
 
 
 
 
 
676
 
677
  except Exception as e:
678
  logging.error(f"Error generating response: {str(e)}")
679
+ return "I encountered an error. Please try again."
680
 
681
  def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
 
682
  self.context_history.append({"role": "user", "content": message})
683
  if history:
684
  for h in history[-self.max_context_length:]:
685
+ if h[0]:
686
  self.context_history.append({"role": "user", "content": h[0]})
687
+ if h[1]:
688
  self.context_history.append({"role": "assistant", "content": h[1]})
689
 
 
690
  self.context_history = self.context_history[-(self.max_context_length*2):]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
 
692
  teaching_assistant = TeachingAssistant()
693
 
694
  # ========== GRADIO INTERFACE ==========
695
  def create_interface():
696
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
 
697
  session_token = gr.State(value=generate_session_token())
698
  profile_manager.set_session(session_token.value)
699
 
 
700
  tab_completed = gr.State({
701
  0: False, # Transcript Upload
702
  1: False, # Learning Style Quiz
 
705
  4: False # AI Assistant
706
  })
707
 
708
+ # Custom CSS
709
  app.css = """
710
  .gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
711
  .tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
 
718
  .quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
719
  .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
720
 
 
721
  .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
722
  .dark .quiz-question { background-color: #3d3d3d !important; }
723
  .dark .quiz-results { background-color: #2e3d2e !important; }
 
727
  .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
728
  """
729
 
730
+ # Header
731
  with gr.Row():
732
  with gr.Column(scale=4):
733
  gr.Markdown("""
 
753
 
754
  nav_message = gr.HTML(visible=False)
755
 
756
+ # Main tabs
757
  with gr.Tabs(visible=True) as tabs:
758
  # ===== TAB 1: TRANSCRIPT UPLOAD =====
759
  with gr.Tab("Transcript", id=0):
 
772
  with gr.Column(scale=2):
773
  transcript_output = gr.Textbox(
774
  label="Analysis Results",
775
+ lines=5,
776
  interactive=False
777
  )
778
  transcript_data = gr.State()
779
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  upload_btn.click(
781
+ fn=parse_transcript,
782
  inputs=[file_input, tab_completed],
783
+ outputs=[transcript_output, transcript_data]
784
+ ).then(
785
+ fn=lambda: {0: True},
786
+ inputs=None,
787
+ outputs=tab_completed
788
+ ).then(
789
+ fn=lambda: gr.update(elem_classes="completed-tab"),
790
+ outputs=step1
791
+ ).then(
792
+ fn=lambda: gr.update(interactive=True),
793
+ outputs=step2
794
  )
795
 
796
  # ===== TAB 2: LEARNING STYLE QUIZ =====
 
821
  elem_classes="quiz-results"
822
  )
823
 
 
824
  for component in quiz_components:
825
  component.change(
826
  fn=lambda *answers: {
 
832
  outputs=progress
833
  )
834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835
  quiz_submit.click(
836
+ fn=lambda *answers: learning_style_quiz.evaluate_quiz(*answers),
837
+ inputs=quiz_components,
838
+ outputs=learning_output
839
+ ).then(
840
+ fn=lambda: gr.update(visible=True),
841
+ outputs=learning_output
842
+ ).then(
843
+ fn=lambda: {1: True},
844
+ inputs=None,
845
+ outputs=tab_completed
846
+ ).then(
847
+ fn=lambda: gr.update(elem_classes="completed-tab"),
848
+ outputs=step2
849
+ ).then(
850
+ fn=lambda: gr.update(interactive=True),
851
+ outputs=step3
852
  )
853
 
854
  quiz_clear.click(
 
887
  character = gr.Textbox(label="Favorite Character (from any story)")
888
  character_reason = gr.Textbox(label="Why do you like them?", lines=2)
889
 
 
890
  with gr.Accordion("Personal Blog (Optional)", open=False):
891
  blog = gr.Textbox(
892
  label="Share your thoughts",
893
+ placeholder="Write something about yourself...",
894
  lines=5
895
  )
896
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
  save_personal_btn.click(
898
+ fn=lambda n, a, i, ts: (
899
+ {2: True},
900
+ gr.update(elem_classes="completed-tab"),
901
+ gr.update(interactive=True),
902
+ gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True)
903
+ ),
904
  inputs=[name, age, interests, tab_completed],
905
+ outputs=[tab_completed, step3, step4, save_confirmation]
906
  )
907
 
908
  # ===== TAB 4: SAVE & REVIEW =====
 
929
  label="Profile Summary"
930
  )
931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932
  save_btn.click(
933
+ fn=profile_manager.save_profile,
934
  inputs=[
935
  name, age, interests, transcript_data, learning_output,
936
  movie, movie_reason, show, show_reason,
937
+ book, book_reason, character, character_reason, blog
 
938
  ],
939
+ outputs=output_summary
940
  ).then(
941
+ fn=lambda: {3: True},
942
+ inputs=None,
943
+ outputs=tab_completed
944
  ).then(
945
+ fn=lambda: gr.update(elem_classes="completed-tab"),
946
+ outputs=step4
947
  ).then(
948
+ fn=lambda: gr.update(interactive=True),
949
+ outputs=step5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950
  ).then(
951
  fn=lambda: profile_manager.list_profiles(session_token.value),
952
  outputs=load_profile_dropdown
 
957
  fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
958
  outputs=delete_btn
959
  )
 
 
 
 
 
 
 
 
 
 
960
 
961
  # ===== TAB 5: AI ASSISTANT =====
962
  with gr.Tab("AI Assistant", id=4):
963
  gr.Markdown("## Your Personalized Learning Assistant")
964
  gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
965
 
 
966
  async def chat_wrapper(message: str, history: List[List[str]]):
967
  response = await teaching_assistant.generate_response(
968
  message,
 
974
  chatbot = gr.ChatInterface(
975
  fn=chat_wrapper,
976
  examples=[
977
+ "What's my GPA?",
978
+ "How should I study for math?",
979
+ "What courses am I taking?",
980
+ "Study tips for my learning style"
 
981
  ],
982
  title=""
983
  )
 
986
  def navigate_to_tab(tab_index: int, tab_completed_status):
987
  current_tab = tabs.selected
988
 
 
989
  if tab_index <= current_tab:
990
  return gr.Tabs(selected=tab_index), gr.update(visible=False)
991
 
 
992
  if not tab_completed_status.get(current_tab, False):
993
  messages = {
994
  0: "Please complete the transcript analysis first.",
 
1006
 
1007
  return gr.Tabs(selected=tab_index), gr.update(visible=False)
1008
 
 
1009
  step1.click(
1010
  lambda idx, status: navigate_to_tab(idx, status),
1011
  inputs=[gr.State(0), tab_completed],
 
1047
 
1048
  return app
1049
 
 
1050
  app = create_interface()
1051
 
1052
  if __name__ == "__main__":