Dannyar608 commited on
Commit
5261899
Β·
verified Β·
1 Parent(s): 9c9be5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -953
app.py CHANGED
@@ -38,17 +38,17 @@ import matplotlib.pyplot as plt
38
  # Enhanced Configuration
39
  PROFILES_DIR = "student_profiles"
40
  ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"]
41
- MAX_FILE_SIZE_MB = 10 # Increased from 5MB
42
  MIN_AGE = 5
43
  MAX_AGE = 120
44
  SESSION_TOKEN_LENGTH = 32
45
  HF_TOKEN = os.getenv("HF_TOKEN")
46
  ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode())
47
- SESSION_TIMEOUT = 3600 * 3 # 3 hour session timeout
48
  MAX_CONTEXT_HISTORY = 10
49
  MAX_PROFILE_LOAD_ATTEMPTS = 3
50
 
51
- # Initialize logging with enhanced configuration
52
  logging.basicConfig(
53
  level=logging.INFO,
54
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -59,10 +59,10 @@ logging.basicConfig(
59
  )
60
  logger = logging.getLogger(__name__)
61
 
62
- # Model configuration - Using more capable model
63
- MODEL_NAME = "deepseek-ai/deepseek-llm-7b" # Upgraded from 1.3b to 7b
64
 
65
- # Initialize Hugging Face API with retry logic
66
  if HF_TOKEN:
67
  hf_api = None
68
  for attempt in range(3):
@@ -73,7 +73,7 @@ if HF_TOKEN:
73
  break
74
  except Exception as e:
75
  logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
76
- time.sleep(2 ** attempt) # Exponential backoff
77
 
78
  # ========== LEARNING STYLE QUIZ ==========
79
  class LearningStyleQuiz:
@@ -119,7 +119,6 @@ class LearningStyleQuiz:
119
  'kinesthetic': 0
120
  }
121
 
122
- # Map each answer to a learning style
123
  for answer in answers:
124
  if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower():
125
  style_counts['visual'] += 1
@@ -133,7 +132,6 @@ class LearningStyleQuiz:
133
  primary_style = max(style_counts, key=style_counts.get)
134
  secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3]
135
 
136
- # Generate results
137
  result = [
138
  "## 🎯 Your Learning Style Results",
139
  f"Your primary learning style is **{primary_style.capitalize()}**",
@@ -183,7 +181,7 @@ class LearningStyleQuiz:
183
  # Initialize learning style quiz
184
  learning_style_quiz = LearningStyleQuiz()
185
 
186
- # ========== ENHANCED MODEL LOADER ==========
187
  class ModelLoader:
188
  def __init__(self):
189
  self.model = None
@@ -196,7 +194,6 @@ class ModelLoader:
196
  self.max_retries = 3
197
 
198
  def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
199
- """Enhanced lazy load the model with progress feedback and retry logic"""
200
  if self.loaded:
201
  return self.model, self.tokenizer
202
 
@@ -212,7 +209,6 @@ class ModelLoader:
212
  if progress:
213
  progress(0.1, desc="Initializing model environment...")
214
 
215
- # Clear GPU cache more aggressively
216
  if self.device == "cuda":
217
  torch.cuda.empty_cache()
218
  torch.cuda.reset_peak_memory_stats()
@@ -220,7 +216,6 @@ class ModelLoader:
220
  if progress:
221
  progress(0.2, desc="Loading tokenizer...")
222
 
223
- # Tokenizer with more error handling
224
  tokenizer = None
225
  for attempt in range(3):
226
  try:
@@ -239,7 +234,6 @@ class ModelLoader:
239
  if progress:
240
  progress(0.5, desc="Loading model (this may take a few minutes)...")
241
 
242
- # Model configuration with fallbacks
243
  model_kwargs = {
244
  "trust_remote_code": True,
245
  "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
@@ -248,7 +242,6 @@ class ModelLoader:
248
  "offload_folder": "offload"
249
  }
250
 
251
- # Add max_memory configuration if multiple GPUs available
252
  if torch.cuda.device_count() > 1:
253
  model_kwargs["max_memory"] = {i: "20GiB" for i in range(torch.cuda.device_count())}
254
 
@@ -275,7 +268,6 @@ class ModelLoader:
275
  logger.warning(f"Model loading attempt {attempt + 1} failed: {str(e)}")
276
  time.sleep(2 ** attempt)
277
 
278
- # Test inference
279
  if progress:
280
  progress(0.8, desc="Verifying model...")
281
  test_input = tokenizer("Test", return_tensors="pt").to(self.device)
@@ -307,580 +299,89 @@ model_loader = ModelLoader()
307
  def get_model_and_tokenizer():
308
  return model_loader.load_model()
309
 
310
- # ========== ENHANCED UTILITY FUNCTIONS ==========
311
- class DataEncryptor:
312
- def __init__(self, key: str):
313
- self.cipher = Fernet(key.encode())
314
-
315
- def encrypt(self, data: str) -> str:
316
- return self.cipher.encrypt(data.encode()).decode()
317
-
318
- def decrypt(self, encrypted_data: str) -> str:
319
- return self.cipher.decrypt(encrypted_data.encode()).decode()
320
-
321
- encryptor = DataEncryptor(ENCRYPTION_KEY)
322
-
323
- def generate_session_token() -> str:
324
- alphabet = string.ascii_letters + string.digits
325
- return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
326
-
327
- def sanitize_input(text: str) -> str:
328
- if not text:
329
- return ""
330
- text = html.escape(text.strip())
331
- text = re.sub(r'<[^>]*>', '', text)
332
- text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
333
- return text
334
-
335
- def validate_name(name: str) -> str:
336
- name = name.strip()
337
- if not name:
338
- raise ValueError("Name cannot be empty.")
339
- if len(name) > 100:
340
- raise ValueError("Name is too long (maximum 100 characters).")
341
- if any(c.isdigit() for c in name):
342
- raise ValueError("Name cannot contain numbers.")
343
- return name
344
-
345
- def validate_age(age: Union[int, float, str]) -> int:
346
- try:
347
- age_int = int(age)
348
- if not MIN_AGE <= age_int <= MAX_AGE:
349
- raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
350
- return age_int
351
- except (ValueError, TypeError):
352
- raise ValueError("Please enter a valid age number.")
353
-
354
- def validate_file(file_obj) -> None:
355
- if not file_obj:
356
- raise ValueError("Please upload a file first")
357
-
358
- file_ext = os.path.splitext(file_obj.name)[1].lower()
359
- if file_ext not in ALLOWED_FILE_TYPES:
360
- raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
361
-
362
- file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
363
- if file_size > MAX_FILE_SIZE_MB:
364
- raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
365
-
366
- def remove_sensitive_info(text: str) -> str:
367
- """Enhanced PII removal with more patterns"""
368
- patterns = [
369
- (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
370
- (r'\b\d{6,9}\b', '[ID]'),
371
- (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
372
- (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'),
373
- (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'), # Simple name pattern
374
- (r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'),
375
- (r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]')
376
- ]
377
-
378
- for pattern, replacement in patterns:
379
- text = re.sub(pattern, replacement, text)
380
- return text
381
-
382
- # ========== ENHANCED PDF PARSING ==========
383
- class EnhancedTranscriptParser:
384
  def __init__(self):
385
- self.common_school_patterns = {
386
- 'miami_dade': r'(MIAMI-DADE|DADE COUNTY|MDCPS)',
387
- 'broward': r'(BROWARD COUNTY|BCPS)',
388
- 'florida': r'(FLORIDA|FDOE|FL DOE)'
389
- }
390
- self.transcript_templates = {
391
- 'miami_dade': self._parse_miami_dade_transcript,
392
- 'broward': self._parse_broward_transcript,
393
- 'florida': self._parse_florida_standard_transcript,
394
- 'default': self._parse_generic_transcript
395
- }
396
-
397
- def detect_transcript_type(self, text: str) -> str:
398
- """Detect the transcript format based on patterns"""
399
- text = text.upper()
400
- for template, pattern in self.common_school_patterns.items():
401
- if re.search(pattern, text):
402
- return template
403
- return 'default'
404
-
405
- def parse_transcript(self, file_path: str, file_ext: str) -> Dict:
406
- """Enhanced parsing with format detection and fallbacks"""
407
- try:
408
- # First extract text with appropriate method
409
- text = self.extract_text_from_file(file_path, file_ext)
410
- if not text.strip():
411
- raise ValueError("No text could be extracted from file")
412
-
413
- # Detect transcript type
414
- transcript_type = self.detect_transcript_type(text)
415
- logger.info(f"Detected transcript type: {transcript_type}")
416
-
417
- # Try specialized parser first
418
- parser_func = self.transcript_templates.get(transcript_type, self._parse_generic_transcript)
419
- parsed_data = parser_func(text)
420
-
421
- if not parsed_data:
422
- logger.warning(f"Specialized parser failed, trying generic parser")
423
- parsed_data = self._parse_generic_transcript(text)
424
-
425
- if not parsed_data:
426
- raise ValueError("No data could be parsed from transcript")
427
-
428
- # Validate and enhance parsed data
429
- self.validate_parsed_data(parsed_data)
430
- self.enhance_parsed_data(parsed_data)
431
-
432
- return parsed_data
433
-
434
- except Exception as e:
435
- logger.error(f"Error parsing transcript: {str(e)}")
436
- raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
437
-
438
- def extract_text_from_file(self, file_path: str, file_ext: str) -> str:
439
- """Enhanced text extraction with multiple fallbacks"""
440
- text = ""
441
-
442
- try:
443
- if file_ext == '.pdf':
444
- # Try pdfplumber first for better table handling
445
- try:
446
- with pdfplumber.open(file_path) as pdf:
447
- for page in pdf.pages:
448
- # Try to extract tables first
449
- tables = page.extract_tables({
450
- "vertical_strategy": "text",
451
- "horizontal_strategy": "text",
452
- "intersection_y_tolerance": 10,
453
- "join_tolerance": 20
454
- })
455
-
456
- if tables:
457
- for table in tables:
458
- for row in table:
459
- text += " | ".join(str(cell).strip() for cell in row if cell) + "\n"
460
-
461
- # Fall back to text extraction if tables are empty
462
- page_text = page.extract_text()
463
- if page_text:
464
- text += page_text + "\n"
465
-
466
- if not text.strip():
467
- raise ValueError("PDFPlumber returned empty text")
468
-
469
- except Exception as e:
470
- logger.warning(f"PDFPlumber failed: {str(e)}. Trying PyMuPDF...")
471
- doc = fitz.open(file_path)
472
- for page in doc:
473
- text += page.get_text("text", flags=fitz.TEXT_PRESERVE_IMAGES) + '\n'
474
-
475
- elif file_ext in ['.png', '.jpg', '.jpeg']:
476
- text = self.extract_text_with_enhanced_ocr(file_path)
477
-
478
- text = self.clean_extracted_text(text)
479
-
480
- if not text.strip():
481
- raise ValueError("The file appears to be empty or contains no readable text.")
482
-
483
- return text
484
-
485
- except Exception as e:
486
- logger.error(f"Text extraction error: {str(e)}")
487
- raise ValueError(f"Failed to extract text: {str(e)}")
488
-
489
- def extract_text_with_enhanced_ocr(self, file_path: str) -> str:
490
- """Enhanced OCR with preprocessing"""
491
- try:
492
- image = Image.open(file_path)
493
-
494
- # Preprocessing for better OCR
495
- image = image.convert('L') # Grayscale
496
- image = image.point(lambda x: 0 if x < 140 else 255, '1') # Thresholding
497
-
498
- # Custom config for academic documents
499
- custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-.,:()%$@ '
500
-
501
- # Try with different page segmentation modes
502
- for psm in [6, 11, 4]: # Try different modes
503
- text = pytesseract.image_to_string(image, config=f"{custom_config} --psm {psm}")
504
- if len(text.strip()) > 50: # If we got reasonable text
505
- break
506
-
507
- return text
508
- except Exception as e:
509
- raise ValueError(f"OCR processing failed: {str(e)}")
510
-
511
- def clean_extracted_text(self, text: str) -> str:
512
- """Enhanced cleaning for academic transcripts"""
513
- # Normalize whitespace and case
514
- text = re.sub(r'\s+', ' ', text).strip()
515
-
516
- # Fix common OCR errors in academic contexts
517
- replacements = {
518
- 'GradeLv1': 'GradeLvl',
519
- 'CrsNu m': 'CrsNum',
520
- 'YOG': 'Year of Graduation',
521
- 'Comm Serv': 'Community Service',
522
- r'\bA\s*-\s*': 'A-', # Fix requirement codes
523
- r'\bB\s*-\s*': 'B-',
524
- r'\bC\s*-\s*': 'C-',
525
- r'\bD\s*-\s*': 'D-',
526
- r'\bE\s*-\s*': 'E-',
527
- r'\bF\s*-\s*': 'F-',
528
- r'\bG\s*-\s*': 'G-',
529
- r'\bZ\s*-\s*': 'Z-',
530
- 'lnProgress': 'inProgress',
531
- 'lP': 'IP',
532
- 'AP\s': 'AP ',
533
- 'DE\s': 'DE ',
534
- 'Honors\s': 'Honors ',
535
- 'lB': 'IB'
536
- }
537
-
538
- for pattern, replacement in replacements.items():
539
- text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
540
-
541
- # Fix course codes with spaces
542
- text = re.sub(r'(\b[A-Z]{2,4})\s(\d{3}[A-Z]?\b)', r'\1\2', text)
543
-
544
- return text
545
-
546
- def validate_parsed_data(self, parsed_data: Dict) -> bool:
547
- """Enhanced validation with more fields"""
548
- required_fields = [
549
- ('student_info', 'name'),
550
- ('student_info', 'id'),
551
- ('requirements',), # At least some requirements
552
- ('course_history',) # At least some courses
553
- ]
554
-
555
- for path in required_fields:
556
- current = parsed_data
557
- for key in path:
558
- if key not in current:
559
- raise ValueError(f"Missing critical field: {'.'.join(path)}")
560
- current = current[key]
561
- return True
562
-
563
- def enhance_parsed_data(self, parsed_data: Dict) -> Dict:
564
- """Add derived fields and calculations"""
565
- # Calculate total credits if not present
566
- if 'total_credits' not in parsed_data.get('student_info', {}):
567
- try:
568
- total_credits = sum(
569
- float(course.get('credits', 0))
570
- for course in parsed_data.get('course_history', [])
571
- if course and str(course.get('credits', '0')).replace('.', '').isdigit()
572
- )
573
- parsed_data['student_info']['total_credits'] = round(total_credits, 2)
574
- except:
575
- pass
576
 
577
- # Calculate GPA if not present
578
- if 'weighted_gpa' not in parsed_data.get('student_info', {}):
579
- try:
580
- grades = []
581
- grade_points = {
582
- 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7,
583
- 'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0
584
- }
585
-
586
- for course in parsed_data.get('course_history', []):
587
- grade = course.get('grade_earned', '').upper()
588
- if grade in grade_points:
589
- grades.append(grade_points[grade])
590
-
591
- if grades:
592
- unweighted_gpa = sum(grades) / len(grades)
593
- parsed_data['student_info']['unweighted_gpa'] = round(unweighted_gpa, 2)
594
-
595
- # Simple weighted GPA calculation (AP/IB/DE courses get +1)
596
- weighted_grades = []
597
- for course in parsed_data.get('course_history', []):
598
- grade = course.get('grade_earned', '').upper()
599
- if grade in grade_points:
600
- weight = 1.0 if any(x in course.get('course_name', '').upper()
601
- for x in ['AP', 'IB', 'DE', 'HONORS']) else 0.0
602
- weighted_grades.append(grade_points[grade] + weight)
603
-
604
- if weighted_grades:
605
- parsed_data['student_info']['weighted_gpa'] = round(sum(weighted_grades) / len(weighted_grades), 2)
606
- except:
607
- pass
608
 
609
- return parsed_data
610
-
611
- def _parse_miami_dade_transcript(self, text: str) -> Optional[Dict]:
612
- """Enhanced Miami-Dade parser with better table handling"""
613
- try:
614
- parsed_data = {
615
- 'student_info': {},
616
- 'requirements': {},
617
- 'course_history': [],
618
- 'assessments': {}
619
- }
620
-
621
- # Extract student info with more robust pattern
622
- student_info_match = re.search(
623
- r"(\d{7})\s*-\s*(.*?)\s*\n.*?Current Grade:\s*(\d+).*?YOG\s*(\d{4})",
624
- text,
625
- re.DOTALL | re.IGNORECASE
626
- )
627
- if student_info_match:
628
- parsed_data['student_info'] = {
629
- 'id': student_info_match.group(1),
630
- 'name': student_info_match.group(2).strip(),
631
- 'grade': student_info_match.group(3),
632
- 'year_of_graduation': student_info_match.group(4),
633
- 'district': 'Miami-Dade'
634
- }
635
-
636
- # Extract GPA information with more flexible patterns
637
- gpa_patterns = [
638
- r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
639
- r"GPA\s*\(.*?\)\s*:\s*([\d.]+)",
640
- r"Grade\s*Point\s*Average\s*:\s*([\d.]+)"
641
- ]
642
-
643
- gpa_values = []
644
- for pattern in gpa_patterns:
645
- gpa_values.extend(re.findall(pattern, text, re.IGNORECASE))
646
- if len(gpa_values) >= 2:
647
- break
648
-
649
- if len(gpa_values) >= 1:
650
- parsed_data['student_info']['unweighted_gpa'] = float(gpa_values[0])
651
- if len(gpa_values) >= 2:
652
- parsed_data['student_info']['weighted_gpa'] = float(gpa_values[1])
653
-
654
- # Extract community service info
655
- service_hours_match = re.search(r"Comm\s*Serv\s*Hours\s*(\d+)", text, re.IGNORECASE)
656
- if service_hours_match:
657
- parsed_data['student_info']['community_service_hours'] = int(service_hours_match.group(1))
658
-
659
- service_date_match = re.search(r"Comm\s*Serv\s*Date\s*(\d{2}/\d{2}/\d{4})", text, re.IGNORECASE)
660
- if service_date_match:
661
- parsed_data['student_info']['community_service_date'] = service_date_match.group(1)
662
-
663
- # Extract credits info
664
- credits_match = re.search(r"Total\s*Credits\s*Earned\s*([\d.]+)", text, re.IGNORECASE)
665
- if credits_match:
666
- parsed_data['student_info']['total_credits'] = float(credits_match.group(1))
667
-
668
- # Extract virtual grade
669
- virtual_grade_match = re.search(r"Virtual\s*Grade\s*([A-Z])", text, re.IGNORECASE)
670
- if virtual_grade_match:
671
- parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
672
-
673
- # Enhanced requirements section parsing
674
- req_section = re.search(
675
- r"(?:Graduation\s*Requirements|Requirements\s*Summary).*?(Code\s*Description.*?)(?:\n\s*\n|$)",
676
- text,
677
- re.DOTALL | re.IGNORECASE
678
- )
679
-
680
- if req_section:
681
- req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
682
- for line in req_lines:
683
- if '|' in line: # Table format
684
- parts = [part.strip() for part in line.split('|') if part.strip()]
685
- if len(parts) >= 5: # More lenient check for number of columns
686
- try:
687
- code = parts[0] if len(parts) > 0 else ""
688
- description = parts[1] if len(parts) > 1 else ""
689
- required = float(parts[2]) if len(parts) > 2 and parts[2].replace('.','').isdigit() else 0.0
690
- waived = float(parts[3]) if len(parts) > 3 and parts[3].replace('.','').isdigit() else 0.0
691
- completed = float(parts[4]) if len(parts) > 4 and parts[4].replace('.','').isdigit() else 0.0
692
- status = parts[5] if len(parts) > 5 else ""
693
-
694
- # Extract percentage if available
695
- percent = 0.0
696
- if status:
697
- percent_match = re.search(r"(\d+)%", status)
698
- if percent_match:
699
- percent = float(percent_match.group(1))
700
-
701
- parsed_data['requirements'][code] = {
702
- "description": description,
703
- "required": required,
704
- "waived": waived,
705
- "completed": completed,
706
- "percent_complete": percent,
707
- "status": status
708
- }
709
- except (IndexError, ValueError) as e:
710
- logger.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}")
711
- continue
712
-
713
- # Enhanced course history parsing
714
- course_section = re.search(
715
- r"(?:Course\s*History|Academic\s*Record).*?(Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits.*?)(?:\n\s*\n|$)",
716
- text,
717
- re.DOTALL | re.IGNORECASE
718
- )
719
-
720
- if course_section:
721
- course_lines = [
722
- line.strip() for line in course_section.group(1).split('\n')
723
- if line.strip() and '|' in line
724
- ]
725
-
726
- for line in course_lines:
727
- parts = [part.strip() for part in line.split('|') if part.strip()]
728
-
729
- try:
730
- course = {
731
- 'requirement': parts[0] if len(parts) > 0 else "",
732
- 'school_year': parts[1] if len(parts) > 1 else "",
733
- 'grade_level': parts[2] if len(parts) > 2 else "",
734
- 'course_code': parts[3] if len(parts) > 3 else "",
735
- 'description': parts[4] if len(parts) > 4 else "",
736
- 'term': parts[5] if len(parts) > 5 else "",
737
- 'district_number': parts[6] if len(parts) > 6 else "",
738
- 'fg': parts[7] if len(parts) > 7 else "",
739
- 'included': parts[8] if len(parts) > 8 else "",
740
- 'credits': parts[9] if len(parts) > 9 else "0",
741
- 'status': 'Completed' if parts[9] and parts[9] != 'inProgress' else 'In Progress'
742
- }
743
-
744
- # Handle credits conversion
745
- if "inprogress" in course['credits'].lower() or not course['credits']:
746
- course['credits'] = "0"
747
- elif not course['credits'].replace('.','').isdigit():
748
- course['credits'] = "0"
749
-
750
- parsed_data['course_history'].append(course)
751
- except (IndexError, ValueError) as e:
752
- logger.warning(f"Skipping malformed course line: {line}. Error: {str(e)}")
753
- continue
754
-
755
- return parsed_data
756
 
757
- except Exception as e:
758
- logger.warning(f"Miami-Dade transcript parsing failed: {str(e)}")
759
- return None
760
-
761
- def _parse_broward_transcript(self, text: str) -> Optional[Dict]:
762
- """Parser for Broward County transcripts"""
763
- try:
764
  parsed_data = {
765
- 'student_info': {},
766
- 'requirements': {},
767
- 'course_history': [],
768
- 'assessments': {}
769
  }
770
 
771
- # Broward-specific patterns
772
- student_info_match = re.search(
773
- r"Student:\s*(\d+)\s*-\s*(.*?)\s*Grade:\s*(\d+)",
774
- text,
775
- re.IGNORECASE
776
- )
777
- if student_info_match:
778
- parsed_data['student_info'] = {
779
- 'id': student_info_match.group(1),
780
- 'name': student_info_match.group(2).strip(),
781
- 'grade': student_info_match.group(3),
782
- 'district': 'Broward'
783
- }
784
-
785
- # Add Broward-specific parsing logic here...
786
-
787
  return parsed_data
788
- except Exception as e:
789
- logger.warning(f"Broward transcript parsing failed: {str(e)}")
790
- return None
791
-
792
- def _parse_florida_standard_transcript(self, text: str) -> Optional[Dict]:
793
- """Parser for Florida standard transcripts"""
794
- try:
795
- parsed_data = {
796
- 'student_info': {},
797
- 'requirements': {},
798
- 'course_history': [],
799
- 'assessments': {}
800
- }
801
-
802
- # Florida standard patterns
803
- student_info_match = re.search(
804
- r"Florida\s*Student\s*Transcript.*?Name:\s*(.*?)\s*ID:\s*(\d+)",
805
- text,
806
- re.IGNORECASE | re.DOTALL
807
- )
808
- if student_info_match:
809
- parsed_data['student_info'] = {
810
- 'name': student_info_match.group(1).strip(),
811
- 'id': student_info_match.group(2),
812
- 'district': 'Florida'
813
- }
814
-
815
- # Add Florida standard parsing logic here...
816
-
817
- return parsed_data
818
- except Exception as e:
819
- logger.warning(f"Florida standard transcript parsing failed: {str(e)}")
820
- return None
821
-
822
- def _parse_generic_transcript(self, text: str) -> Optional[Dict]:
823
- """Fallback parser for generic transcripts"""
824
- try:
825
- parsed_data = {
826
- 'student_info': {},
827
- 'requirements': {},
828
- 'course_history': [],
829
- 'assessments': {}
830
  }
831
-
832
- # Try to extract basic student info
833
- name_match = re.search(r"(?:Student|Name):\s*(.*?)\s*(?:\n|ID|$)", text, re.IGNORECASE)
834
- if name_match:
835
- parsed_data['student_info']['name'] = name_match.group(1).strip()
836
-
837
- id_match = re.search(r"(?:ID|Student\s*Number):\s*(\d+)", text, re.IGNORECASE)
838
- if id_match:
839
- parsed_data['student_info']['id'] = id_match.group(1)
840
-
841
- # Try to extract courses
842
- course_patterns = [
843
- r"([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)", # CODE DESC GRADE CREDITS
844
- r"(\d{4}-\d{4})\s+([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)", # YEAR CODE DESC GRADE CREDITS
845
- r"(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)" # DESC GRADE CREDITS
846
- ]
847
-
848
- for pattern in course_patterns:
849
- courses = re.findall(pattern, text)
850
- if courses:
851
- for course in courses:
852
- if len(course) == 4:
853
- parsed_data['course_history'].append({
854
- 'course_code': course[0],
855
- 'description': course[1],
856
- 'grade': course[2],
857
- 'credits': course[3]
858
- })
859
- elif len(course) == 5:
860
- parsed_data['course_history'].append({
861
- 'school_year': course[0],
862
- 'course_code': course[1],
863
- 'description': course[2],
864
- 'grade': course[3],
865
- 'credits': course[4]
866
- })
867
- elif len(course) == 3:
868
- parsed_data['course_history'].append({
869
- 'description': course[0],
870
- 'grade': course[1],
871
- 'credits': course[2]
872
- })
873
- break
874
-
875
- return parsed_data if parsed_data['course_history'] else None
876
- except Exception as e:
877
- logger.warning(f"Generic transcript parsing failed: {str(e)}")
878
- return None
879
 
880
- # Initialize enhanced parser
881
- transcript_parser = EnhancedTranscriptParser()
882
 
883
- # ========== ENHANCED ANALYSIS FUNCTIONS ==========
884
  class AcademicAnalyzer:
885
  def __init__(self):
886
  self.gpa_scale = {
@@ -896,7 +397,6 @@ class AcademicAnalyzer:
896
  }
897
 
898
  def analyze_gpa(self, parsed_data: Dict) -> Dict:
899
- """Enhanced GPA analysis with more detailed feedback"""
900
  analysis = {
901
  'rating': '',
902
  'description': '',
@@ -954,7 +454,6 @@ class AcademicAnalyzer:
954
  "Focus on fundamental study skills"
955
  ]
956
 
957
- # Add comparison between weighted and unweighted
958
  if weighted_gpa > 0 and unweighted_gpa > 0:
959
  diff = weighted_gpa - unweighted_gpa
960
  if diff > 0.5:
@@ -974,7 +473,6 @@ class AcademicAnalyzer:
974
  }
975
 
976
  def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
977
- """Enhanced graduation analysis with requirement breakdown"""
978
  analysis = {
979
  'status': '',
980
  'completion_percentage': 0,
@@ -998,7 +496,6 @@ class AcademicAnalyzer:
998
 
999
  analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0
1000
 
1001
- # Identify missing requirements
1002
  analysis['missing_requirements'] = [
1003
  {
1004
  'code': code,
@@ -1010,7 +507,6 @@ class AcademicAnalyzer:
1010
  if req and float(req.get('completed', 0)) < float(req.get('required', 0))
1011
  ]
1012
 
1013
- # Determine status message
1014
  current_grade = parsed_data.get('student_info', {}).get('grade', '')
1015
  grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '')
1016
 
@@ -1030,7 +526,6 @@ class AcademicAnalyzer:
1030
  analysis['status'] = f"❌ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed."
1031
  analysis['on_track'] = False
1032
 
1033
- # Add timeline projection if possible
1034
  if current_grade and grad_year:
1035
  remaining_credits = total_required - total_completed
1036
  years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade)
@@ -1053,7 +548,6 @@ class AcademicAnalyzer:
1053
  }
1054
 
1055
  def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
1056
- """Analyze the difficulty level of courses taken"""
1057
  analysis = {
1058
  'advanced_courses': 0,
1059
  'honors_courses': 0,
@@ -1127,7 +621,6 @@ class AcademicAnalyzer:
1127
  }
1128
 
1129
  def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
1130
- """Enhanced college recommendations based on full profile"""
1131
  recommendations = {
1132
  'reach': [],
1133
  'target': [],
@@ -1137,12 +630,10 @@ class AcademicAnalyzer:
1137
  }
1138
 
1139
  try:
1140
- # Get key metrics
1141
  weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
1142
  rigor_analysis = self.analyze_course_rigor(parsed_data)
1143
  service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0))
1144
 
1145
- # Determine college tiers
1146
  if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100:
1147
  recommendations['reach'].extend([
1148
  "Ivy League: Harvard, Yale, Princeton, Columbia, etc.",
@@ -1190,7 +681,6 @@ class AcademicAnalyzer:
1190
  "Technical Schools"
1191
  ])
1192
 
1193
- # Scholarship recommendations
1194
  if weighted_gpa >= 4.0:
1195
  recommendations['scholarships'].extend([
1196
  "National Merit Scholarship",
@@ -1210,7 +700,6 @@ class AcademicAnalyzer:
1210
  "First-Generation Student Programs"
1211
  ])
1212
 
1213
- # Improvement areas
1214
  if weighted_gpa < 3.5:
1215
  recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring")
1216
  if rigor_analysis['advanced_courses'] < 4:
@@ -1229,7 +718,6 @@ class AcademicAnalyzer:
1229
  }
1230
 
1231
  def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
1232
- """Generate personalized study plan based on learning style and courses"""
1233
  plan = {
1234
  'weekly_schedule': {},
1235
  'study_strategies': [],
@@ -1238,19 +726,16 @@ class AcademicAnalyzer:
1238
  }
1239
 
1240
  try:
1241
- # Get current courses
1242
  current_courses = [
1243
  course for course in parsed_data.get('course_history', [])
1244
  if course.get('status', '').lower() == 'in progress'
1245
  ]
1246
 
1247
- # Generate weekly schedule template
1248
  days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
1249
  for day in days:
1250
  plan['weekly_schedule'][day] = []
1251
 
1252
- # Add study blocks based on learning style
1253
- study_blocks = 2 # Default
1254
  if learning_style.lower() == 'visual':
1255
  study_blocks = 3
1256
  plan['study_strategies'].extend([
@@ -1280,9 +765,8 @@ class AcademicAnalyzer:
1280
  "Use hands-on activities when possible"
1281
  ])
1282
 
1283
- # Distribute study blocks
1284
  for i, course in enumerate(current_courses):
1285
- day_index = i % 5 # Monday-Friday
1286
  day = days[day_index]
1287
  plan['weekly_schedule'][day].append({
1288
  'course': course.get('description', 'Course'),
@@ -1294,14 +778,12 @@ class AcademicAnalyzer:
1294
  ]
1295
  })
1296
 
1297
- # Add time management tips
1298
  plan['time_management_tips'].extend([
1299
  "Use the Pomodoro technique (25 min study, 5 min break)",
1300
  "Prioritize assignments by due date and importance",
1301
  "Schedule regular review sessions"
1302
  ])
1303
 
1304
- # Add resource recommendations
1305
  plan['resource_recommendations'].extend([
1306
  "Khan Academy for math and science",
1307
  "Quizlet for flashcards",
@@ -1320,7 +802,7 @@ class AcademicAnalyzer:
1320
  # Initialize academic analyzer
1321
  academic_analyzer = AcademicAnalyzer()
1322
 
1323
- # ========== ENHANCED VISUALIZATION FUNCTIONS ==========
1324
  class DataVisualizer:
1325
  def __init__(self):
1326
  self.color_palette = {
@@ -1335,7 +817,6 @@ class DataVisualizer:
1335
  }
1336
 
1337
  def create_gpa_visualization(self, parsed_data: Dict):
1338
- """Enhanced GPA visualization with more details"""
1339
  try:
1340
  gpa_data = {
1341
  "Type": ["Weighted GPA", "Unweighted GPA"],
@@ -1362,7 +843,6 @@ class DataVisualizer:
1362
  hover_data={"Type": True, "Value": ":.2f"}
1363
  )
1364
 
1365
- # Add reference lines and annotations
1366
  fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left")
1367
  fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left")
1368
  fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left")
@@ -1389,7 +869,6 @@ class DataVisualizer:
1389
  return None
1390
 
1391
  def create_requirements_visualization(self, parsed_data: Dict):
1392
- """Enhanced requirements visualization with interactive elements"""
1393
  try:
1394
  req_data = []
1395
  for code, req in parsed_data.get('requirements', {}).items():
@@ -1448,21 +927,20 @@ class DataVisualizer:
1448
  return None
1449
 
1450
  def create_credits_distribution_visualization(self, parsed_data: Dict):
1451
- """Enhanced credits distribution visualization"""
1452
  try:
1453
  core_credits = sum(
1454
  req['completed'] for req in parsed_data.get('requirements', {}).values()
1455
- if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D'] # English, Math, Science, Social Studies
1456
  )
1457
 
1458
  elective_credits = sum(
1459
  req['completed'] for req in parsed_data.get('requirements', {}).values()
1460
- if req and req.get('code', '').split('-')[0] in ['G', 'H'] # Electives
1461
  )
1462
 
1463
  other_credits = sum(
1464
  req['completed'] for req in parsed_data.get('requirements', {}).values()
1465
- if req and req.get('code', '').split('-')[0] in ['E', 'F'] # Arts, PE
1466
  )
1467
 
1468
  credit_values = [core_credits, elective_credits, other_credits]
@@ -1510,7 +988,6 @@ class DataVisualizer:
1510
  return None
1511
 
1512
  def create_course_rigor_visualization(self, parsed_data: Dict):
1513
- """Visualization of course rigor analysis"""
1514
  try:
1515
  rigor = academic_analyzer.analyze_course_rigor(parsed_data)
1516
 
@@ -1559,7 +1036,7 @@ class DataVisualizer:
1559
  # Initialize visualizer
1560
  data_visualizer = DataVisualizer()
1561
 
1562
- # ========== ENHANCED PROFILE MANAGEMENT ==========
1563
  class EnhancedProfileManager:
1564
  def __init__(self):
1565
  self.profiles_dir = Path(PROFILES_DIR)
@@ -1581,7 +1058,6 @@ class EnhancedProfileManager:
1581
  movie: str, movie_reason: str, show: str, show_reason: str,
1582
  book: str, book_reason: str, character: str, character_reason: str,
1583
  blog: str, study_plan: Dict = None) -> str:
1584
- """Enhanced profile saving with encryption and validation"""
1585
  try:
1586
  name = validate_name(name)
1587
  age = validate_age(age)
@@ -1595,7 +1071,6 @@ class EnhancedProfileManager:
1595
  if not learning_style or "Your primary learning style is" not in learning_style:
1596
  raise ValueError("Please complete the learning style quiz first.")
1597
 
1598
- # Prepare favorites with sanitization
1599
  favorites = {
1600
  "movie": sanitize_input(movie),
1601
  "movie_reason": sanitize_input(movie_reason),
@@ -1607,7 +1082,6 @@ class EnhancedProfileManager:
1607
  "character_reason": sanitize_input(character_reason)
1608
  }
1609
 
1610
- # Generate study plan if not provided
1611
  if not study_plan:
1612
  learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style)
1613
  if learning_style_match:
@@ -1615,30 +1089,27 @@ class EnhancedProfileManager:
1615
  transcript,
1616
  learning_style_match.group(1))
1617
 
1618
- # Prepare data with encryption for sensitive fields
1619
  data = {
1620
  "name": self.encryptor.encrypt(name),
1621
  "age": age,
1622
  "interests": self.encryptor.encrypt(sanitize_input(interests)),
1623
- "transcript": transcript, # Already sanitized during parsing
1624
  "learning_style": learning_style,
1625
  "favorites": favorites,
1626
  "blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "",
1627
  "study_plan": study_plan if study_plan else {},
1628
  "session_token": self.current_session,
1629
  "last_updated": time.time(),
1630
- "version": "2.0" # Profile version for compatibility
1631
  }
1632
 
1633
  filepath = self.get_profile_path(name)
1634
 
1635
- # Save with atomic write
1636
  temp_path = filepath.with_suffix('.tmp')
1637
  with open(temp_path, "w", encoding='utf-8') as f:
1638
  json.dump(data, f, indent=2, ensure_ascii=False)
1639
- temp_path.replace(filepath) # Atomic replace
1640
 
1641
- # Optional cloud backup
1642
  if HF_TOKEN and hf_api:
1643
  try:
1644
  hf_api.upload_file(
@@ -1658,7 +1129,6 @@ class EnhancedProfileManager:
1658
  raise gr.Error(f"Couldn't save profile: {str(e)}")
1659
 
1660
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
1661
- """Enhanced profile loading with decryption and retries"""
1662
  for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS):
1663
  try:
1664
  if session_token:
@@ -1673,7 +1143,6 @@ class EnhancedProfileManager:
1673
  if name:
1674
  profile_file = self.get_profile_path(name)
1675
  if not profile_file.exists():
1676
- # Try to download from Hugging Face Hub
1677
  if HF_TOKEN and hf_api:
1678
  try:
1679
  hf_api.download_file(
@@ -1688,18 +1157,15 @@ class EnhancedProfileManager:
1688
  else:
1689
  raise gr.Error(f"No profile found for {name}")
1690
  else:
1691
- # Load most recently modified profile
1692
  profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True)
1693
  profile_file = profiles[0]
1694
 
1695
  with open(profile_file, "r", encoding='utf-8') as f:
1696
  profile_data = json.load(f)
1697
 
1698
- # Check session timeout
1699
  if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
1700
  raise gr.Error("Session expired. Please start a new session.")
1701
 
1702
- # Decrypt encrypted fields
1703
  if profile_data.get('version', '1.0') == '2.0':
1704
  try:
1705
  profile_data['name'] = self.encryptor.decrypt(profile_data['name'])
@@ -1723,7 +1189,6 @@ class EnhancedProfileManager:
1723
  time.sleep(0.5 * (attempt + 1))
1724
 
1725
  def list_profiles(self, session_token: str = None) -> List[str]:
1726
- """List available profiles with decrypted names"""
1727
  if session_token:
1728
  profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
1729
  else:
@@ -1748,22 +1213,18 @@ class EnhancedProfileManager:
1748
  return profile_names
1749
 
1750
  def delete_profile(self, name: str, session_token: str = None) -> bool:
1751
- """Delete a profile with verification"""
1752
  try:
1753
  profile_file = self.get_profile_path(name)
1754
  if not profile_file.exists():
1755
  return False
1756
 
1757
- # Verify the profile belongs to the current session
1758
  with open(profile_file, "r", encoding='utf-8') as f:
1759
  data = json.load(f)
1760
  if session_token and data.get('session_token') != session_token:
1761
  return False
1762
 
1763
- # Delete local file
1764
  profile_file.unlink()
1765
 
1766
- # Try to delete from Hugging Face Hub
1767
  if HF_TOKEN and hf_api:
1768
  try:
1769
  hf_api.delete_file(
@@ -1779,10 +1240,10 @@ class EnhancedProfileManager:
1779
  logger.error(f"Error deleting profile: {str(e)}")
1780
  return False
1781
 
1782
- # Initialize enhanced profile manager
1783
  profile_manager = EnhancedProfileManager()
1784
 
1785
- # ========== ENHANCED AI TEACHING ASSISTANT ==========
1786
  class EnhancedTeachingAssistant:
1787
  def __init__(self):
1788
  self.context_history = []
@@ -1791,14 +1252,12 @@ class EnhancedTeachingAssistant:
1791
  self.last_model_load_attempt = 0
1792
 
1793
  async def initialize_model(self):
1794
- """Lazy initialize the model with retries"""
1795
  if not self.model or not self.tokenizer:
1796
- if time.time() - self.last_model_load_attempt > 3600: # Retry every hour if failed
1797
  self.model, self.tokenizer = get_model_and_tokenizer()
1798
  self.last_model_load_attempt = time.time()
1799
 
1800
  async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
1801
- """Enhanced response generation with context awareness"""
1802
  try:
1803
  await self.initialize_model()
1804
 
@@ -1808,28 +1267,24 @@ class EnhancedTeachingAssistant:
1808
 
1809
  self._update_context(message, history)
1810
 
1811
- # Get relevant profile information
1812
  student_name = profile.get('name', 'Student')
1813
  gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
1814
  learning_style = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
1815
  profile.get('learning_style', ''))
1816
  learning_style = learning_style.group(1) if learning_style else None
1817
 
1818
- # Prepare context for the model
1819
  context = f"You are an AI teaching assistant helping {student_name}. "
1820
  if gpa:
1821
  context += f"{student_name}'s current weighted GPA is {gpa}. "
1822
  if learning_style:
1823
  context += f"They are a {learning_style.lower()} learner. "
1824
 
1825
- # Add recent conversation history
1826
  if self.context_history:
1827
  context += "Recent conversation:\n"
1828
  for item in self.context_history[-self.max_context_length:]:
1829
  role = "Student" if item['role'] == 'user' else "Assistant"
1830
  context += f"{role}: {item['content']}\n"
1831
 
1832
- # Generate response based on query type
1833
  query_type = self._classify_query(message)
1834
  response = await self._generate_typed_response(query_type, message, context, profile)
1835
 
@@ -1840,7 +1295,6 @@ class EnhancedTeachingAssistant:
1840
  return "I encountered an error processing your request. Please try again."
1841
 
1842
  def _classify_query(self, message: str) -> str:
1843
- """Classify the type of user query"""
1844
  message_lower = message.lower()
1845
 
1846
  if any(word in message_lower for word in ['gpa', 'grade', 'average']):
@@ -1859,7 +1313,6 @@ class EnhancedTeachingAssistant:
1859
  return 'general'
1860
 
1861
  async def _generate_typed_response(self, query_type: str, message: str, context: str, profile: Dict) -> str:
1862
- """Generate response based on query type"""
1863
  if query_type == 'gpa':
1864
  return self._generate_gpa_response(profile)
1865
  elif query_type == 'study':
@@ -1876,7 +1329,6 @@ class EnhancedTeachingAssistant:
1876
  return await self._generate_general_response(message, context)
1877
 
1878
  def _generate_gpa_response(self, profile: Dict) -> str:
1879
- """Generate response about GPA"""
1880
  gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
1881
  if not gpa:
1882
  return "I couldn't find your GPA information. Please upload your transcript first."
@@ -1902,7 +1354,6 @@ class EnhancedTeachingAssistant:
1902
  return "\n\n".join(response)
1903
 
1904
  def _generate_study_response(self, profile: Dict) -> str:
1905
- """Generate study advice based on learning style"""
1906
  learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
1907
  profile.get('learning_style', ''))
1908
  if not learning_style_match:
@@ -1918,7 +1369,6 @@ class EnhancedTeachingAssistant:
1918
  if study_plan.get('study_strategies'):
1919
  response.extend([f"- {strategy}" for strategy in study_plan['study_strategies']])
1920
  else:
1921
- # Fallback if no study plan
1922
  if learning_style.lower() == 'visual':
1923
  response.extend([
1924
  "- Use color coding in your notes",
@@ -1951,18 +1401,15 @@ class EnhancedTeachingAssistant:
1951
  return "\n\n".join(response)
1952
 
1953
  def _generate_courses_response(self, profile: Dict) -> str:
1954
- """Generate response about current/past courses"""
1955
  transcript = profile.get('transcript', {})
1956
  if not transcript.get('course_history'):
1957
  return "I couldn't find your course information. Please upload your transcript first."
1958
 
1959
- # Get current courses (in progress)
1960
  current_courses = [
1961
  course for course in transcript['course_history']
1962
  if course.get('status', '').lower() == 'in progress'
1963
  ]
1964
 
1965
- # Get past completed courses
1966
  completed_courses = [
1967
  course for course in transcript['course_history']
1968
  if course.get('status', '').lower() == 'completed'
@@ -1972,7 +1419,7 @@ class EnhancedTeachingAssistant:
1972
 
1973
  if current_courses:
1974
  response.append("**Your Current Courses:**")
1975
- for course in current_courses[:5]: # Limit to 5 courses
1976
  response.append(
1977
  f"- {course.get('description', 'Unknown')} "
1978
  f"({course.get('course_code', '')})"
@@ -1982,7 +1429,7 @@ class EnhancedTeachingAssistant:
1982
 
1983
  if completed_courses:
1984
  response.append("\n**Recently Completed Courses:**")
1985
- for course in completed_courses[:5]: # Limit to 5 courses
1986
  grade = course.get('grade_earned', '')
1987
  if grade:
1988
  response.append(
@@ -1992,7 +1439,6 @@ class EnhancedTeachingAssistant:
1992
  else:
1993
  response.append(f"- {course.get('description', 'Unknown')}")
1994
 
1995
- # Add rigor analysis
1996
  rigor = academic_analyzer.analyze_course_rigor(transcript)
1997
  if rigor['rating']:
1998
  response.append(f"\n**Course Rigor Analysis:** {rigor['rating']}")
@@ -2003,7 +1449,6 @@ class EnhancedTeachingAssistant:
2003
  return "\n".join(response)
2004
 
2005
  def _generate_college_response(self, profile: Dict) -> str:
2006
- """Generate college recommendations"""
2007
  recommendations = academic_analyzer.generate_college_recommendations(profile.get('transcript', {}))
2008
 
2009
  response = ["**College Recommendations Based on Your Profile:**"]
@@ -2031,7 +1476,6 @@ class EnhancedTeachingAssistant:
2031
  return "\n".join(response)
2032
 
2033
  def _generate_planning_response(self, profile: Dict) -> str:
2034
- """Generate study/schedule planning advice"""
2035
  study_plan = profile.get('study_plan', {})
2036
 
2037
  response = ["**Study Planning Advice:**"]
@@ -2041,7 +1485,7 @@ class EnhancedTeachingAssistant:
2041
  for day, activities in study_plan['weekly_schedule'].items():
2042
  if activities:
2043
  response.append(f"\n**{day}:**")
2044
- for activity in activities[:2]: # Show 2 activities per day max
2045
  response.append(
2046
  f"- {activity.get('course', 'Course')}: "
2047
  f"{activity.get('duration', '45-60 minutes')}"
@@ -2059,23 +1503,20 @@ class EnhancedTeachingAssistant:
2059
  return "\n".join(response)
2060
 
2061
  def _generate_resources_response(self, profile: Dict) -> str:
2062
- """Generate resource recommendations"""
2063
  study_plan = profile.get('study_plan', {})
2064
  transcript = profile.get('transcript', {})
2065
 
2066
  response = ["**Recommended Learning Resources:**"]
2067
 
2068
- # General resources
2069
  if study_plan.get('resource_recommendations'):
2070
  response.extend([f"- {resource}" for resource in study_plan['resource_recommendations'][:3]])
2071
  else:
2072
  response.extend([
2073
- "- Khan Academy (free lessons on many subjects)",
2074
- "- Quizlet (flashcards and study tools)",
2075
  "- Wolfram Alpha for math help"
2076
  ])
2077
 
2078
- # Subject-specific resources
2079
  current_courses = [
2080
  course for course in transcript.get('course_history', [])
2081
  if course.get('status', '').lower() == 'in progress'
@@ -2083,7 +1524,7 @@ class EnhancedTeachingAssistant:
2083
 
2084
  if current_courses:
2085
  response.append("\n**Course-Specific Resources:**")
2086
- for course in current_courses[:2]: # Limit to 2 courses
2087
  course_name = course.get('description', 'your course')
2088
  if 'MATH' in course_name.upper():
2089
  response.append(f"- For {course_name}: Desmos Graphing Calculator, Art of Problem Solving")
@@ -2095,7 +1536,6 @@ class EnhancedTeachingAssistant:
2095
  return "\n".join(response)
2096
 
2097
  async def _generate_general_response(self, message: str, context: str) -> str:
2098
- """Generate response using the language model"""
2099
  if not self.model or not self.tokenizer:
2100
  return "I'm still loading my knowledge base. Please try again in a moment."
2101
 
@@ -2104,7 +1544,6 @@ class EnhancedTeachingAssistant:
2104
 
2105
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
2106
 
2107
- # Generate response with more controlled parameters
2108
  outputs = self.model.generate(
2109
  **inputs,
2110
  max_new_tokens=200,
@@ -2116,10 +1555,8 @@ class EnhancedTeachingAssistant:
2116
 
2117
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
2118
 
2119
- # Extract just the assistant's response
2120
  response = response[len(prompt):].strip()
2121
 
2122
- # Clean up any incomplete sentences
2123
  if response and response[-1] not in {'.', '!', '?'}:
2124
  last_period = response.rfind('.')
2125
  if last_period > 0:
@@ -2131,7 +1568,6 @@ class EnhancedTeachingAssistant:
2131
  return "I encountered an error generating a response. Please try again."
2132
 
2133
  def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
2134
- """Update conversation context"""
2135
  self.context_history.append({"role": "user", "content": message})
2136
 
2137
  if history:
@@ -2141,290 +1577,97 @@ class EnhancedTeachingAssistant:
2141
  if h[1]:
2142
  self.context_history.append({"role": "assistant", "content": h[1]})
2143
 
2144
- # Trim to max context length
2145
  self.context_history = self.context_history[-(self.max_context_length * 2):]
2146
 
2147
- # Initialize enhanced teaching assistant
2148
  teaching_assistant = EnhancedTeachingAssistant()
2149
 
2150
- # ========== STUDY CALENDAR INTEGRATION ==========
2151
- class StudyCalendar:
2152
- def __init__(self):
2153
- self.calendar_events = {}
2154
 
2155
- def generate_study_calendar(self, profile: Dict, start_date: str = None, weeks: int = 4) -> Dict:
2156
- """Generate a study calendar for the given profile"""
2157
- try:
2158
- if not start_date:
2159
- start_date = datetime.date.today().isoformat()
2160
-
2161
- start_date = datetime.date.fromisoformat(start_date)
2162
- study_plan = profile.get('study_plan', {})
2163
-
2164
- calendar = {
2165
- 'start_date': start_date.isoformat(),
2166
- 'end_date': (start_date + datetime.timedelta(weeks=weeks)).isoformat(),
2167
- 'events': [],
2168
- 'exams': [],
2169
- 'assignments': []
2170
- }
2171
-
2172
- # Add study sessions from the study plan
2173
- if study_plan.get('weekly_schedule'):
2174
- for day_offset in range(weeks * 7):
2175
- current_date = start_date + datetime.timedelta(days=day_offset)
2176
- day_name = calendar.day_name[current_date.weekday()]
2177
-
2178
- if day_name in study_plan['weekly_schedule']:
2179
- for session in study_plan['weekly_schedule'][day_name]:
2180
- calendar['events'].append({
2181
- 'date': current_date.isoformat(),
2182
- 'title': f"Study {session.get('course', '')}",
2183
- 'description': "\n".join(session.get('activities', [])),
2184
- 'duration': session.get('duration', '45-60 minutes'),
2185
- 'type': 'study'
2186
- })
2187
-
2188
- # Add exam dates from transcript (if available)
2189
- transcript = profile.get('transcript', {})
2190
- if transcript.get('course_history'):
2191
- for course in transcript['course_history']:
2192
- if course.get('status', '').lower() == 'in progress':
2193
- # Simulate some exam dates (in a real app, these would come from the school calendar)
2194
- midterm_date = (start_date + datetime.timedelta(weeks=2)).isoformat()
2195
- final_date = (start_date + datetime.timedelta(weeks=weeks - 1)).isoformat()
2196
-
2197
- calendar['exams'].append({
2198
- 'date': midterm_date,
2199
- 'title': f"{course.get('description', 'Course')} Midterm",
2200
- 'course': course.get('description', ''),
2201
- 'type': 'exam'
2202
- })
2203
-
2204
- calendar['exams'].append({
2205
- 'date': final_date,
2206
- 'title': f"{course.get('description', 'Course')} Final",
2207
- 'course': course.get('description', ''),
2208
- 'type': 'exam'
2209
- })
2210
-
2211
- return calendar
2212
- except Exception as e:
2213
- logger.error(f"Error generating calendar: {str(e)}")
2214
- return {
2215
- 'start_date': datetime.date.today().isoformat(),
2216
- 'end_date': (datetime.date.today() + datetime.timedelta(weeks=4)).isoformat(),
2217
- 'events': [],
2218
- 'exams': [],
2219
- 'assignments': []
2220
- }
2221
 
2222
- def create_calendar_visualization(self, calendar_data: Dict) -> Optional[plt.Figure]:
2223
- """Create a visualization of the study calendar"""
2224
- try:
2225
- import matplotlib.pyplot as plt
2226
- from matplotlib.patches import Rectangle
2227
-
2228
- # Prepare data
2229
- start_date = datetime.date.fromisoformat(calendar_data['start_date'])
2230
- end_date = datetime.date.fromisoformat(calendar_data['end_date'])
2231
- days = (end_date - start_date).days + 1
2232
-
2233
- # Create figure
2234
- fig, ax = plt.subplots(figsize=(12, 6))
2235
-
2236
- # Draw week grid
2237
- for i in range(0, days, 7):
2238
- ax.add_patch(Rectangle((i, 0), 7, 1, color='#f5f5f5'))
2239
-
2240
- # Add study events
2241
- for event in calendar_data['events']:
2242
- event_date = datetime.date.fromisoformat(event['date'])
2243
- day_offset = (event_date - start_date).days
2244
- ax.add_patch(Rectangle((day_offset, 0.7), 1, 0.3, color='#4CAF50'))
2245
-
2246
- # Add exams
2247
- for exam in calendar_data['exams']:
2248
- exam_date = datetime.date.fromisoformat(exam['date'])
2249
- day_offset = (exam_date - start_date).days
2250
- ax.add_patch(Rectangle((day_offset, 0.3), 1, 0.3, color='#F44336'))
2251
-
2252
- # Configure axes
2253
- ax.set_xlim(0, days)
2254
- ax.set_ylim(0, 1)
2255
- ax.set_xticks(range(0, days, 7))
2256
- ax.set_xticklabels([(start_date + datetime.timedelta(days=x)).strftime('%b %d')
2257
- for x in range(0, days, 7)])
2258
- ax.set_yticks([0.5])
2259
- ax.set_yticklabels(['Study Calendar'])
2260
-
2261
- # Add legend
2262
- ax.add_patch(Rectangle((days-5, 0.7), 1, 0.3, color='#4CAF50'))
2263
- ax.text(days-3.5, 0.85, 'Study Sessions', va='center')
2264
- ax.add_patch(Rectangle((days-5, 0.3), 1, 0.3, color='#F44336'))
2265
- ax.text(days-3.5, 0.45, 'Exams', va='center')
2266
-
2267
- plt.title(f"Study Calendar: {start_date.strftime('%b %d')} to {end_date.strftime('%b %d')}")
2268
- plt.tight_layout()
2269
-
2270
- return fig
2271
- except Exception as e:
2272
- logger.error(f"Error creating calendar visualization: {str(e)}")
2273
- return None
2274
 
2275
- # Initialize study calendar
2276
- study_calendar = StudyCalendar()
2277
 
2278
- # ========== GOAL TRACKING SYSTEM ==========
2279
- class GoalTracker:
2280
- def __init__(self):
2281
- self.goals = {}
2282
-
2283
- def add_goal(self, profile_name: str, goal_type: str, description: str,
2284
- target_date: str, target_value: float = None) -> bool:
2285
- """Add a new goal for the student"""
2286
- try:
2287
- goal_id = hashlib.sha256(f"{profile_name}{goal_type}{description}{time.time()}".encode()).hexdigest()[:16]
2288
-
2289
- self.goals[goal_id] = {
2290
- 'profile_name': profile_name,
2291
- 'type': goal_type,
2292
- 'description': description,
2293
- 'target_date': target_date,
2294
- 'target_value': target_value,
2295
- 'created': time.time(),
2296
- 'progress': [],
2297
- 'completed': False
2298
- }
2299
-
2300
- return True
2301
- except Exception as e:
2302
- logger.error(f"Error adding goal: {str(e)}")
2303
- return False
2304
-
2305
- def update_goal_progress(self, goal_id: str, progress_value: float, notes: str = "") -> bool:
2306
- """Update progress toward a goal"""
2307
- try:
2308
- if goal_id not in self.goals:
2309
- return False
2310
-
2311
- self.goals[goal_id]['progress'].append({
2312
- 'date': time.time(),
2313
- 'value': progress_value,
2314
- 'notes': notes
2315
- })
2316
-
2317
- # Check if goal is completed
2318
- if self.goals[goal_id].get('target_value') is not None:
2319
- if progress_value >= self.goals[goal_id]['target_value']:
2320
- self.goals[goal_id]['completed'] = True
2321
-
2322
- return True
2323
- except Exception as e:
2324
- logger.error(f"Error updating goal: {str(e)}")
2325
- return False
2326
 
2327
- def get_goals(self, profile_name: str) -> List[Dict]:
2328
- """Get all goals for a student"""
2329
- return [
2330
- {**goal, 'id': goal_id}
2331
- for goal_id, goal in self.goals.items()
2332
- if goal['profile_name'] == profile_name
2333
- ]
2334
 
2335
- def create_goal_visualization(self, goals: List[Dict]) -> Optional[plt.Figure]:
2336
- """Create a visualization of goal progress"""
2337
- try:
2338
- import matplotlib.pyplot as plt
2339
-
2340
- if not goals:
2341
- return None
2342
-
2343
- # Prepare data
2344
- goal_names = [goal['description'][:20] + ('...' if len(goal['description']) > 20 else '')
2345
- for goal in goals]
2346
- progress_values = [
2347
- goal['progress'][-1]['value'] if goal['progress'] else 0
2348
- for goal in goals
2349
- ]
2350
- target_values = [
2351
- goal['target_value'] if goal['target_value'] is not None else progress_values[i]
2352
- for i, goal in enumerate(goals)
2353
- ]
2354
-
2355
- # Create figure
2356
- fig, ax = plt.subplots(figsize=(10, 6))
2357
-
2358
- # Plot bars
2359
- x = range(len(goals))
2360
- bar_width = 0.35
2361
-
2362
- progress_bars = ax.bar(
2363
- [i - bar_width/2 for i in x],
2364
- progress_values,
2365
- bar_width,
2366
- label='Current Progress',
2367
- color='#4CAF50'
2368
- )
2369
-
2370
- target_bars = ax.bar(
2371
- [i + bar_width/2 for i in x],
2372
- target_values,
2373
- bar_width,
2374
- label='Target',
2375
- color='#2196F3'
2376
- )
2377
-
2378
- # Add labels and title
2379
- ax.set_xlabel('Goals')
2380
- ax.set_ylabel('Progress')
2381
- ax.set_title('Goal Progress Tracking')
2382
- ax.set_xticks(x)
2383
- ax.set_xticklabels(goal_names, rotation=45, ha='right')
2384
- ax.legend()
2385
-
2386
- # Add value labels
2387
- for bar in progress_bars:
2388
- height = bar.get_height()
2389
- ax.annotate(f'{height:.1f}',
2390
- xy=(bar.get_x() + bar.get_width() / 2, height),
2391
- xytext=(0, 3),
2392
- textcoords="offset points",
2393
- ha='center', va='bottom')
2394
-
2395
- for bar in target_bars:
2396
- height = bar.get_height()
2397
- ax.annotate(f'{height:.1f}',
2398
- xy=(bar.get_x() + bar.get_width() / 2, height),
2399
- xytext=(0, 3),
2400
- textcoords="offset points",
2401
- ha='center', va='bottom')
2402
-
2403
- plt.tight_layout()
2404
- return fig
2405
- except Exception as e:
2406
- logger.error(f"Error creating goal visualization: {str(e)}")
2407
- return None
2408
 
2409
- # Initialize goal tracker
2410
- goal_tracker = GoalTracker()
 
 
 
 
 
 
 
 
 
 
 
 
2411
 
2412
- # ========== ENHANCED GRADIO INTERFACE ==========
2413
  def create_enhanced_interface():
2414
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
2415
  session_token = gr.State(value=generate_session_token())
2416
  profile_manager.set_session(session_token.value)
2417
 
2418
  tab_completed = gr.State({
2419
- 0: False, # Transcript Upload
2420
- 1: False, # Learning Style Quiz
2421
- 2: False, # Personal Questions
2422
- 3: False, # Save & Review
2423
- 4: False, # AI Assistant
2424
- 5: False # Goals & Planning
2425
  })
2426
 
2427
- # Custom CSS with enhanced styling
2428
  app.css = """
2429
  .gradio-container {
2430
  max-width: 1200px !important;
@@ -2537,7 +1780,6 @@ def create_enhanced_interface():
2537
  border-left: 4px solid #2196F3;
2538
  }
2539
 
2540
- /* Dark mode styles */
2541
  .dark .tab-content {
2542
  background-color: #2d2d2d !important;
2543
  border-color: #444 !important;
@@ -2579,7 +1821,6 @@ def create_enhanced_interface():
2579
  }
2580
  """
2581
 
2582
- # Header with improved layout
2583
  with gr.Row():
2584
  with gr.Column(scale=4):
2585
  gr.Markdown("""
@@ -2590,7 +1831,6 @@ def create_enhanced_interface():
2590
  with gr.Column(scale=1):
2591
  dark_mode = gr.Checkbox(label="Dark Mode", value=False)
2592
 
2593
- # Navigation buttons with icons
2594
  with gr.Row():
2595
  with gr.Column(scale=1, min_width=100):
2596
  step1 = gr.Button("πŸ“„ 1. Transcript", elem_classes="incomplete-tab")
@@ -2607,9 +1847,7 @@ def create_enhanced_interface():
2607
 
2608
  nav_message = gr.HTML(visible=False)
2609
 
2610
- # Main tabs
2611
  with gr.Tabs(visible=True) as tabs:
2612
- # ===== TAB 1: TRANSCRIPT UPLOAD =====
2613
  with gr.Tab("Transcript", id=0):
2614
  with gr.Row():
2615
  with gr.Column(scale=1):
@@ -2650,15 +1888,12 @@ def create_enhanced_interface():
2650
 
2651
  def process_and_visualize(file_obj, tab_status):
2652
  try:
2653
- # Parse transcript with enhanced parser
2654
- parsed_data = transcript_parser.parse_transcript(file_obj.name, os.path.splitext(file_obj.name)[1].lower())
2655
 
2656
- # Generate analyses
2657
  gpa_analysis = academic_analyzer.analyze_gpa(parsed_data)
2658
  grad_status = academic_analyzer.analyze_graduation_status(parsed_data)
2659
  college_recs = academic_analyzer.generate_college_recommendations(parsed_data)
2660
 
2661
- # Format results
2662
  results = [
2663
  f"## πŸ“Š GPA Analysis",
2664
  f"**Rating:** {gpa_analysis['rating']}",
@@ -2688,7 +1923,6 @@ def create_enhanced_interface():
2688
  results.append("\n**Improvement Tips:**")
2689
  results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']])
2690
 
2691
- # Update visualizations
2692
  viz_updates = [
2693
  gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None),
2694
  gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None),
@@ -2696,7 +1930,6 @@ def create_enhanced_interface():
2696
  gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None)
2697
  ]
2698
 
2699
- # Update tab completion status
2700
  tab_status[0] = True
2701
 
2702
  return "\n".join(results), parsed_data, *viz_updates, tab_status
@@ -2717,7 +1950,6 @@ def create_enhanced_interface():
2717
  outputs=step2
2718
  )
2719
 
2720
- # ===== TAB 2: LEARNING STYLE QUIZ =====
2721
  with gr.Tab("Learning Style Quiz", id=1):
2722
  with gr.Column():
2723
  gr.Markdown("### πŸ“ Step 2: Discover Your Learning Style")
@@ -2783,7 +2015,6 @@ def create_enhanced_interface():
2783
  outputs=progress
2784
  )
2785
 
2786
- # ===== TAB 3: PERSONAL QUESTIONS =====
2787
  with gr.Tab("Personal Profile", id=2):
2788
  with gr.Row():
2789
  with gr.Column(scale=1):
@@ -2829,7 +2060,6 @@ def create_enhanced_interface():
2829
  outputs=[tab_completed, step3, step4, save_confirmation]
2830
  )
2831
 
2832
- # ===== TAB 4: SAVE & REVIEW =====
2833
  with gr.Tab("Save Profile", id=3):
2834
  with gr.Row():
2835
  with gr.Column(scale=1):
@@ -2929,12 +2159,10 @@ def create_enhanced_interface():
2929
  ]
2930
  )
2931
 
2932
- # ===== TAB 5: AI ASSISTANT =====
2933
  with gr.Tab("AI Assistant", id=4):
2934
  gr.Markdown("## πŸ’¬ Your Personalized Learning Assistant")
2935
  gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
2936
 
2937
- # Create custom chatbot interface
2938
  chatbot = gr.Chatbot(height=500)
2939
  msg = gr.Textbox(label="Your Message")
2940
  clear = gr.Button("Clear")
@@ -2947,7 +2175,6 @@ def create_enhanced_interface():
2947
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
2948
  clear.click(lambda: None, None, chatbot, queue=False)
2949
 
2950
- # ===== TAB 6: GOALS & PLANNING =====
2951
  with gr.Tab("Goals & Planning", id=5):
2952
  with gr.Row():
2953
  with gr.Column(scale=1):
@@ -2976,7 +2203,6 @@ def create_enhanced_interface():
2976
  calendar_output = gr.HTML()
2977
  calendar_viz = gr.Plot(label="Calendar Visualization", visible=False)
2978
 
2979
- # Show/hide target value based on goal type
2980
  goal_type.change(
2981
  fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]),
2982
  inputs=goal_type,
@@ -3029,7 +2255,6 @@ def create_enhanced_interface():
3029
 
3030
  calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat())
3031
 
3032
- # Create HTML display
3033
  calendar_html = []
3034
  current_date = datetime.date.fromisoformat(calendar['start_date'])
3035
  end_date = datetime.date.fromisoformat(calendar['end_date'])
@@ -3073,7 +2298,6 @@ def create_enhanced_interface():
3073
  gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None)
3074
  )
3075
 
3076
- # Add goal functionality
3077
  add_goal_btn.click(
3078
  fn=lambda gt, desc, date, val: (
3079
  goal_tracker.add_goal(name.value, gt, desc, date, val),
@@ -3091,16 +2315,13 @@ def create_enhanced_interface():
3091
  outputs=[goals_output, goal_viz]
3092
  )
3093
 
3094
- # Generate calendar functionality
3095
  generate_calendar_btn.click(
3096
  fn=lambda date: update_calendar_display(name.value, date),
3097
  inputs=calendar_start_date,
3098
  outputs=[calendar_output, calendar_viz]
3099
  )
3100
 
3101
- # Navigation logic
3102
  def navigate_to_tab(tab_index: int, tab_completed_status: dict):
3103
- # Check if all previous tabs are completed
3104
  for i in range(tab_index):
3105
  if not tab_completed_status.get(i, False):
3106
  messages = [
@@ -3111,7 +2332,7 @@ def create_enhanced_interface():
3111
  "Please complete the previous steps first"
3112
  ]
3113
  return (
3114
- gr.Tabs(selected=i), # Go to first incomplete tab
3115
  gr.update(
3116
  value=f"<div class='error-message'>β›” {messages[i]}</div>",
3117
  visible=True
@@ -3151,7 +2372,6 @@ def create_enhanced_interface():
3151
  outputs=[tabs, nav_message]
3152
  )
3153
 
3154
- # Dark mode toggle
3155
  def toggle_dark_mode(dark):
3156
  return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
3157
 
@@ -3161,7 +2381,6 @@ def create_enhanced_interface():
3161
  outputs=None
3162
  )
3163
 
3164
- # Load model on startup
3165
  app.load(fn=lambda: model_loader.load_model(), outputs=[])
3166
 
3167
  return app
 
38
  # Enhanced Configuration
39
  PROFILES_DIR = "student_profiles"
40
  ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"]
41
+ MAX_FILE_SIZE_MB = 10
42
  MIN_AGE = 5
43
  MAX_AGE = 120
44
  SESSION_TOKEN_LENGTH = 32
45
  HF_TOKEN = os.getenv("HF_TOKEN")
46
  ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode())
47
+ SESSION_TIMEOUT = 3600 * 3
48
  MAX_CONTEXT_HISTORY = 10
49
  MAX_PROFILE_LOAD_ATTEMPTS = 3
50
 
51
+ # Initialize logging
52
  logging.basicConfig(
53
  level=logging.INFO,
54
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 
59
  )
60
  logger = logging.getLogger(__name__)
61
 
62
+ # Model configuration
63
+ MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
64
 
65
+ # Initialize Hugging Face API
66
  if HF_TOKEN:
67
  hf_api = None
68
  for attempt in range(3):
 
73
  break
74
  except Exception as e:
75
  logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
76
+ time.sleep(2 ** attempt)
77
 
78
  # ========== LEARNING STYLE QUIZ ==========
79
  class LearningStyleQuiz:
 
119
  'kinesthetic': 0
120
  }
121
 
 
122
  for answer in answers:
123
  if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower():
124
  style_counts['visual'] += 1
 
132
  primary_style = max(style_counts, key=style_counts.get)
133
  secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3]
134
 
 
135
  result = [
136
  "## 🎯 Your Learning Style Results",
137
  f"Your primary learning style is **{primary_style.capitalize()}**",
 
181
  # Initialize learning style quiz
182
  learning_style_quiz = LearningStyleQuiz()
183
 
184
+ # ========== MODEL LOADER ==========
185
  class ModelLoader:
186
  def __init__(self):
187
  self.model = None
 
194
  self.max_retries = 3
195
 
196
  def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
 
197
  if self.loaded:
198
  return self.model, self.tokenizer
199
 
 
209
  if progress:
210
  progress(0.1, desc="Initializing model environment...")
211
 
 
212
  if self.device == "cuda":
213
  torch.cuda.empty_cache()
214
  torch.cuda.reset_peak_memory_stats()
 
216
  if progress:
217
  progress(0.2, desc="Loading tokenizer...")
218
 
 
219
  tokenizer = None
220
  for attempt in range(3):
221
  try:
 
234
  if progress:
235
  progress(0.5, desc="Loading model (this may take a few minutes)...")
236
 
 
237
  model_kwargs = {
238
  "trust_remote_code": True,
239
  "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
 
242
  "offload_folder": "offload"
243
  }
244
 
 
245
  if torch.cuda.device_count() > 1:
246
  model_kwargs["max_memory"] = {i: "20GiB" for i in range(torch.cuda.device_count())}
247
 
 
268
  logger.warning(f"Model loading attempt {attempt + 1} failed: {str(e)}")
269
  time.sleep(2 ** attempt)
270
 
 
271
  if progress:
272
  progress(0.8, desc="Verifying model...")
273
  test_input = tokenizer("Test", return_tensors="pt").to(self.device)
 
299
  def get_model_and_tokenizer():
300
  return model_loader.load_model()
301
 
302
+ # ========== TRANSCRIPT PARSER ==========
303
+ class MiamiDadeTranscriptParser:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  def __init__(self):
305
+ self.student_info_pattern = re.compile(
306
+ r"(\d{7}) - (.*?)\s*\|\s*Current Grade:\s*(\d+)\s*\|\s*YOG\s*(\d{4})"
307
+ r"\s*\|\s*Weighted GPA\s*([\d.]+)\s*\|\s*Comm Serv Date\s*(\d{2}/\d{2}/\d{4})"
308
+ r"\s*\|\s*Total Credits Earned\s*([\d.]+)"
309
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
+ self.requirement_pattern = re.compile(
312
+ r"([A-Z]-[A-Za-z ]+)\s*\|\s*([^|]+)\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([^|]+)%"
313
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
+ self.course_pattern = re.compile(
316
+ r"([A-Z]-[A-Za-z ]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^|]+)\|"
317
+ r"\s*([A-Z0-9])\s*\|\s*(\d+)\s*\|\s*([A-Z])\s*\|\s*([A-Z])\s*\|\s*([\d.]+|inProgress)"
318
+ )
319
+
320
+ def parse_transcript(self, file_path: str) -> Dict:
321
+ """Parse Miami-Dade County transcript PDF"""
322
+ with pdfplumber.open(file_path) as pdf:
323
+ text = "\n".join(page.extract_text() for page in pdf.pages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
 
 
 
 
 
 
 
325
  parsed_data = {
326
+ 'student_info': self._parse_student_info(text),
327
+ 'requirements': self._parse_requirements(text),
328
+ 'course_history': self._parse_courses(text)
 
329
  }
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  return parsed_data
332
+
333
+ def _parse_student_info(self, text: str) -> Dict:
334
+ """Extract student information"""
335
+ match = self.student_info_pattern.search(text)
336
+ if not match:
337
+ return {}
338
+
339
+ return {
340
+ 'id': match.group(1),
341
+ 'name': match.group(2).strip(),
342
+ 'grade': match.group(3),
343
+ 'year_of_graduation': match.group(4),
344
+ 'weighted_gpa': float(match.group(5)),
345
+ 'community_service_date': match.group(6),
346
+ 'total_credits': float(match.group(7)),
347
+ 'district': 'Miami-Dade'
348
+ }
349
+
350
+ def _parse_requirements(self, text: str) -> Dict:
351
+ """Parse graduation requirements section"""
352
+ requirements = {}
353
+ for match in self.requirement_pattern.finditer(text):
354
+ requirements[match.group(1).strip()] = {
355
+ 'description': match.group(2).strip(),
356
+ 'required': float(match.group(3)),
357
+ 'waived': float(match.group(4)),
358
+ 'completed': float(match.group(5)),
359
+ 'percent_complete': float(match.group(6))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  }
361
+ return requirements
362
+
363
+ def _parse_courses(self, text: str) -> List[Dict]:
364
+ """Parse course history section"""
365
+ courses = []
366
+ for match in self.course_pattern.finditer(text):
367
+ courses.append({
368
+ 'requirement': match.group(1).strip(),
369
+ 'school_year': match.group(2),
370
+ 'grade_level': match.group(3),
371
+ 'course_code': match.group(4),
372
+ 'description': match.group(5).strip(),
373
+ 'term': match.group(6),
374
+ 'district_number': match.group(7),
375
+ 'included': match.group(8),
376
+ 'credits': 0 if 'inProgress' in match.group(9) else float(match.group(9)),
377
+ 'status': 'In Progress' if 'inProgress' in match.group(9) else 'Completed'
378
+ })
379
+ return courses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
+ # Initialize transcript parser
382
+ transcript_parser = MiamiDadeTranscriptParser()
383
 
384
+ # ========== ACADEMIC ANALYZER ==========
385
  class AcademicAnalyzer:
386
  def __init__(self):
387
  self.gpa_scale = {
 
397
  }
398
 
399
  def analyze_gpa(self, parsed_data: Dict) -> Dict:
 
400
  analysis = {
401
  'rating': '',
402
  'description': '',
 
454
  "Focus on fundamental study skills"
455
  ]
456
 
 
457
  if weighted_gpa > 0 and unweighted_gpa > 0:
458
  diff = weighted_gpa - unweighted_gpa
459
  if diff > 0.5:
 
473
  }
474
 
475
  def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
 
476
  analysis = {
477
  'status': '',
478
  'completion_percentage': 0,
 
496
 
497
  analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0
498
 
 
499
  analysis['missing_requirements'] = [
500
  {
501
  'code': code,
 
507
  if req and float(req.get('completed', 0)) < float(req.get('required', 0))
508
  ]
509
 
 
510
  current_grade = parsed_data.get('student_info', {}).get('grade', '')
511
  grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '')
512
 
 
526
  analysis['status'] = f"❌ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed."
527
  analysis['on_track'] = False
528
 
 
529
  if current_grade and grad_year:
530
  remaining_credits = total_required - total_completed
531
  years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade)
 
548
  }
549
 
550
  def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
 
551
  analysis = {
552
  'advanced_courses': 0,
553
  'honors_courses': 0,
 
621
  }
622
 
623
  def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
 
624
  recommendations = {
625
  'reach': [],
626
  'target': [],
 
630
  }
631
 
632
  try:
 
633
  weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
634
  rigor_analysis = self.analyze_course_rigor(parsed_data)
635
  service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0))
636
 
 
637
  if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100:
638
  recommendations['reach'].extend([
639
  "Ivy League: Harvard, Yale, Princeton, Columbia, etc.",
 
681
  "Technical Schools"
682
  ])
683
 
 
684
  if weighted_gpa >= 4.0:
685
  recommendations['scholarships'].extend([
686
  "National Merit Scholarship",
 
700
  "First-Generation Student Programs"
701
  ])
702
 
 
703
  if weighted_gpa < 3.5:
704
  recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring")
705
  if rigor_analysis['advanced_courses'] < 4:
 
718
  }
719
 
720
  def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
 
721
  plan = {
722
  'weekly_schedule': {},
723
  'study_strategies': [],
 
726
  }
727
 
728
  try:
 
729
  current_courses = [
730
  course for course in parsed_data.get('course_history', [])
731
  if course.get('status', '').lower() == 'in progress'
732
  ]
733
 
 
734
  days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
735
  for day in days:
736
  plan['weekly_schedule'][day] = []
737
 
738
+ study_blocks = 2
 
739
  if learning_style.lower() == 'visual':
740
  study_blocks = 3
741
  plan['study_strategies'].extend([
 
765
  "Use hands-on activities when possible"
766
  ])
767
 
 
768
  for i, course in enumerate(current_courses):
769
+ day_index = i % 5
770
  day = days[day_index]
771
  plan['weekly_schedule'][day].append({
772
  'course': course.get('description', 'Course'),
 
778
  ]
779
  })
780
 
 
781
  plan['time_management_tips'].extend([
782
  "Use the Pomodoro technique (25 min study, 5 min break)",
783
  "Prioritize assignments by due date and importance",
784
  "Schedule regular review sessions"
785
  ])
786
 
 
787
  plan['resource_recommendations'].extend([
788
  "Khan Academy for math and science",
789
  "Quizlet for flashcards",
 
802
  # Initialize academic analyzer
803
  academic_analyzer = AcademicAnalyzer()
804
 
805
+ # ========== DATA VISUALIZER ==========
806
  class DataVisualizer:
807
  def __init__(self):
808
  self.color_palette = {
 
817
  }
818
 
819
  def create_gpa_visualization(self, parsed_data: Dict):
 
820
  try:
821
  gpa_data = {
822
  "Type": ["Weighted GPA", "Unweighted GPA"],
 
843
  hover_data={"Type": True, "Value": ":.2f"}
844
  )
845
 
 
846
  fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left")
847
  fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left")
848
  fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left")
 
869
  return None
870
 
871
  def create_requirements_visualization(self, parsed_data: Dict):
 
872
  try:
873
  req_data = []
874
  for code, req in parsed_data.get('requirements', {}).items():
 
927
  return None
928
 
929
  def create_credits_distribution_visualization(self, parsed_data: Dict):
 
930
  try:
931
  core_credits = sum(
932
  req['completed'] for req in parsed_data.get('requirements', {}).values()
933
+ if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D']
934
  )
935
 
936
  elective_credits = sum(
937
  req['completed'] for req in parsed_data.get('requirements', {}).values()
938
+ if req and req.get('code', '').split('-')[0] in ['G', 'H']
939
  )
940
 
941
  other_credits = sum(
942
  req['completed'] for req in parsed_data.get('requirements', {}).values()
943
+ if req and req.get('code', '').split('-')[0] in ['E', 'F']
944
  )
945
 
946
  credit_values = [core_credits, elective_credits, other_credits]
 
988
  return None
989
 
990
  def create_course_rigor_visualization(self, parsed_data: Dict):
 
991
  try:
992
  rigor = academic_analyzer.analyze_course_rigor(parsed_data)
993
 
 
1036
  # Initialize visualizer
1037
  data_visualizer = DataVisualizer()
1038
 
1039
+ # ========== PROFILE MANAGER ==========
1040
  class EnhancedProfileManager:
1041
  def __init__(self):
1042
  self.profiles_dir = Path(PROFILES_DIR)
 
1058
  movie: str, movie_reason: str, show: str, show_reason: str,
1059
  book: str, book_reason: str, character: str, character_reason: str,
1060
  blog: str, study_plan: Dict = None) -> str:
 
1061
  try:
1062
  name = validate_name(name)
1063
  age = validate_age(age)
 
1071
  if not learning_style or "Your primary learning style is" not in learning_style:
1072
  raise ValueError("Please complete the learning style quiz first.")
1073
 
 
1074
  favorites = {
1075
  "movie": sanitize_input(movie),
1076
  "movie_reason": sanitize_input(movie_reason),
 
1082
  "character_reason": sanitize_input(character_reason)
1083
  }
1084
 
 
1085
  if not study_plan:
1086
  learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style)
1087
  if learning_style_match:
 
1089
  transcript,
1090
  learning_style_match.group(1))
1091
 
 
1092
  data = {
1093
  "name": self.encryptor.encrypt(name),
1094
  "age": age,
1095
  "interests": self.encryptor.encrypt(sanitize_input(interests)),
1096
+ "transcript": transcript,
1097
  "learning_style": learning_style,
1098
  "favorites": favorites,
1099
  "blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "",
1100
  "study_plan": study_plan if study_plan else {},
1101
  "session_token": self.current_session,
1102
  "last_updated": time.time(),
1103
+ "version": "2.0"
1104
  }
1105
 
1106
  filepath = self.get_profile_path(name)
1107
 
 
1108
  temp_path = filepath.with_suffix('.tmp')
1109
  with open(temp_path, "w", encoding='utf-8') as f:
1110
  json.dump(data, f, indent=2, ensure_ascii=False)
1111
+ temp_path.replace(filepath)
1112
 
 
1113
  if HF_TOKEN and hf_api:
1114
  try:
1115
  hf_api.upload_file(
 
1129
  raise gr.Error(f"Couldn't save profile: {str(e)}")
1130
 
1131
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
 
1132
  for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS):
1133
  try:
1134
  if session_token:
 
1143
  if name:
1144
  profile_file = self.get_profile_path(name)
1145
  if not profile_file.exists():
 
1146
  if HF_TOKEN and hf_api:
1147
  try:
1148
  hf_api.download_file(
 
1157
  else:
1158
  raise gr.Error(f"No profile found for {name}")
1159
  else:
 
1160
  profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True)
1161
  profile_file = profiles[0]
1162
 
1163
  with open(profile_file, "r", encoding='utf-8') as f:
1164
  profile_data = json.load(f)
1165
 
 
1166
  if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
1167
  raise gr.Error("Session expired. Please start a new session.")
1168
 
 
1169
  if profile_data.get('version', '1.0') == '2.0':
1170
  try:
1171
  profile_data['name'] = self.encryptor.decrypt(profile_data['name'])
 
1189
  time.sleep(0.5 * (attempt + 1))
1190
 
1191
  def list_profiles(self, session_token: str = None) -> List[str]:
 
1192
  if session_token:
1193
  profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
1194
  else:
 
1213
  return profile_names
1214
 
1215
  def delete_profile(self, name: str, session_token: str = None) -> bool:
 
1216
  try:
1217
  profile_file = self.get_profile_path(name)
1218
  if not profile_file.exists():
1219
  return False
1220
 
 
1221
  with open(profile_file, "r", encoding='utf-8') as f:
1222
  data = json.load(f)
1223
  if session_token and data.get('session_token') != session_token:
1224
  return False
1225
 
 
1226
  profile_file.unlink()
1227
 
 
1228
  if HF_TOKEN and hf_api:
1229
  try:
1230
  hf_api.delete_file(
 
1240
  logger.error(f"Error deleting profile: {str(e)}")
1241
  return False
1242
 
1243
+ # Initialize profile manager
1244
  profile_manager = EnhancedProfileManager()
1245
 
1246
+ # ========== TEACHING ASSISTANT ==========
1247
  class EnhancedTeachingAssistant:
1248
  def __init__(self):
1249
  self.context_history = []
 
1252
  self.last_model_load_attempt = 0
1253
 
1254
  async def initialize_model(self):
 
1255
  if not self.model or not self.tokenizer:
1256
+ if time.time() - self.last_model_load_attempt > 3600:
1257
  self.model, self.tokenizer = get_model_and_tokenizer()
1258
  self.last_model_load_attempt = time.time()
1259
 
1260
  async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
 
1261
  try:
1262
  await self.initialize_model()
1263
 
 
1267
 
1268
  self._update_context(message, history)
1269
 
 
1270
  student_name = profile.get('name', 'Student')
1271
  gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
1272
  learning_style = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
1273
  profile.get('learning_style', ''))
1274
  learning_style = learning_style.group(1) if learning_style else None
1275
 
 
1276
  context = f"You are an AI teaching assistant helping {student_name}. "
1277
  if gpa:
1278
  context += f"{student_name}'s current weighted GPA is {gpa}. "
1279
  if learning_style:
1280
  context += f"They are a {learning_style.lower()} learner. "
1281
 
 
1282
  if self.context_history:
1283
  context += "Recent conversation:\n"
1284
  for item in self.context_history[-self.max_context_length:]:
1285
  role = "Student" if item['role'] == 'user' else "Assistant"
1286
  context += f"{role}: {item['content']}\n"
1287
 
 
1288
  query_type = self._classify_query(message)
1289
  response = await self._generate_typed_response(query_type, message, context, profile)
1290
 
 
1295
  return "I encountered an error processing your request. Please try again."
1296
 
1297
  def _classify_query(self, message: str) -> str:
 
1298
  message_lower = message.lower()
1299
 
1300
  if any(word in message_lower for word in ['gpa', 'grade', 'average']):
 
1313
  return 'general'
1314
 
1315
  async def _generate_typed_response(self, query_type: str, message: str, context: str, profile: Dict) -> str:
 
1316
  if query_type == 'gpa':
1317
  return self._generate_gpa_response(profile)
1318
  elif query_type == 'study':
 
1329
  return await self._generate_general_response(message, context)
1330
 
1331
  def _generate_gpa_response(self, profile: Dict) -> str:
 
1332
  gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
1333
  if not gpa:
1334
  return "I couldn't find your GPA information. Please upload your transcript first."
 
1354
  return "\n\n".join(response)
1355
 
1356
  def _generate_study_response(self, profile: Dict) -> str:
 
1357
  learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
1358
  profile.get('learning_style', ''))
1359
  if not learning_style_match:
 
1369
  if study_plan.get('study_strategies'):
1370
  response.extend([f"- {strategy}" for strategy in study_plan['study_strategies']])
1371
  else:
 
1372
  if learning_style.lower() == 'visual':
1373
  response.extend([
1374
  "- Use color coding in your notes",
 
1401
  return "\n\n".join(response)
1402
 
1403
  def _generate_courses_response(self, profile: Dict) -> str:
 
1404
  transcript = profile.get('transcript', {})
1405
  if not transcript.get('course_history'):
1406
  return "I couldn't find your course information. Please upload your transcript first."
1407
 
 
1408
  current_courses = [
1409
  course for course in transcript['course_history']
1410
  if course.get('status', '').lower() == 'in progress'
1411
  ]
1412
 
 
1413
  completed_courses = [
1414
  course for course in transcript['course_history']
1415
  if course.get('status', '').lower() == 'completed'
 
1419
 
1420
  if current_courses:
1421
  response.append("**Your Current Courses:**")
1422
+ for course in current_courses[:5]:
1423
  response.append(
1424
  f"- {course.get('description', 'Unknown')} "
1425
  f"({course.get('course_code', '')})"
 
1429
 
1430
  if completed_courses:
1431
  response.append("\n**Recently Completed Courses:**")
1432
+ for course in completed_courses[:5]:
1433
  grade = course.get('grade_earned', '')
1434
  if grade:
1435
  response.append(
 
1439
  else:
1440
  response.append(f"- {course.get('description', 'Unknown')}")
1441
 
 
1442
  rigor = academic_analyzer.analyze_course_rigor(transcript)
1443
  if rigor['rating']:
1444
  response.append(f"\n**Course Rigor Analysis:** {rigor['rating']}")
 
1449
  return "\n".join(response)
1450
 
1451
  def _generate_college_response(self, profile: Dict) -> str:
 
1452
  recommendations = academic_analyzer.generate_college_recommendations(profile.get('transcript', {}))
1453
 
1454
  response = ["**College Recommendations Based on Your Profile:**"]
 
1476
  return "\n".join(response)
1477
 
1478
  def _generate_planning_response(self, profile: Dict) -> str:
 
1479
  study_plan = profile.get('study_plan', {})
1480
 
1481
  response = ["**Study Planning Advice:**"]
 
1485
  for day, activities in study_plan['weekly_schedule'].items():
1486
  if activities:
1487
  response.append(f"\n**{day}:**")
1488
+ for activity in activities[:2]:
1489
  response.append(
1490
  f"- {activity.get('course', 'Course')}: "
1491
  f"{activity.get('duration', '45-60 minutes')}"
 
1503
  return "\n".join(response)
1504
 
1505
  def _generate_resources_response(self, profile: Dict) -> str:
 
1506
  study_plan = profile.get('study_plan', {})
1507
  transcript = profile.get('transcript', {})
1508
 
1509
  response = ["**Recommended Learning Resources:**"]
1510
 
 
1511
  if study_plan.get('resource_recommendations'):
1512
  response.extend([f"- {resource}" for resource in study_plan['resource_recommendations'][:3]])
1513
  else:
1514
  response.extend([
1515
+ "- Khan Academy for math and science",
1516
+ "- Quizlet for flashcards",
1517
  "- Wolfram Alpha for math help"
1518
  ])
1519
 
 
1520
  current_courses = [
1521
  course for course in transcript.get('course_history', [])
1522
  if course.get('status', '').lower() == 'in progress'
 
1524
 
1525
  if current_courses:
1526
  response.append("\n**Course-Specific Resources:**")
1527
+ for course in current_courses[:2]:
1528
  course_name = course.get('description', 'your course')
1529
  if 'MATH' in course_name.upper():
1530
  response.append(f"- For {course_name}: Desmos Graphing Calculator, Art of Problem Solving")
 
1536
  return "\n".join(response)
1537
 
1538
  async def _generate_general_response(self, message: str, context: str) -> str:
 
1539
  if not self.model or not self.tokenizer:
1540
  return "I'm still loading my knowledge base. Please try again in a moment."
1541
 
 
1544
 
1545
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
1546
 
 
1547
  outputs = self.model.generate(
1548
  **inputs,
1549
  max_new_tokens=200,
 
1555
 
1556
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
1557
 
 
1558
  response = response[len(prompt):].strip()
1559
 
 
1560
  if response and response[-1] not in {'.', '!', '?'}:
1561
  last_period = response.rfind('.')
1562
  if last_period > 0:
 
1568
  return "I encountered an error generating a response. Please try again."
1569
 
1570
  def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
 
1571
  self.context_history.append({"role": "user", "content": message})
1572
 
1573
  if history:
 
1577
  if h[1]:
1578
  self.context_history.append({"role": "assistant", "content": h[1]})
1579
 
 
1580
  self.context_history = self.context_history[-(self.max_context_length * 2):]
1581
 
1582
+ # Initialize teaching assistant
1583
  teaching_assistant = EnhancedTeachingAssistant()
1584
 
1585
+ # ========== UTILITY FUNCTIONS ==========
1586
+ class DataEncryptor:
1587
+ def __init__(self, key: str):
1588
+ self.cipher = Fernet(key.encode())
1589
 
1590
+ def encrypt(self, data: str) -> str:
1591
+ return self.cipher.encrypt(data.encode()).decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592
 
1593
+ def decrypt(self, encrypted_data: str) -> str:
1594
+ return self.cipher.decrypt(encrypted_data.encode()).decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1595
 
1596
+ encryptor = DataEncryptor(ENCRYPTION_KEY)
 
1597
 
1598
+ def generate_session_token() -> str:
1599
+ alphabet = string.ascii_letters + string.digits
1600
+ return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
1601
+
1602
+ def sanitize_input(text: str) -> str:
1603
+ if not text:
1604
+ return ""
1605
+ text = html.escape(text.strip())
1606
+ text = re.sub(r'<[^>]*>', '', text)
1607
+ text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
1608
+ return text
1609
+
1610
+ def validate_name(name: str) -> str:
1611
+ name = name.strip()
1612
+ if not name:
1613
+ raise ValueError("Name cannot be empty.")
1614
+ if len(name) > 100:
1615
+ raise ValueError("Name is too long (maximum 100 characters).")
1616
+ if any(c.isdigit() for c in name):
1617
+ raise ValueError("Name cannot contain numbers.")
1618
+ return name
1619
+
1620
+ def validate_age(age: Union[int, float, str]) -> int:
1621
+ try:
1622
+ age_int = int(age)
1623
+ if not MIN_AGE <= age_int <= MAX_AGE:
1624
+ raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
1625
+ return age_int
1626
+ except (ValueError, TypeError):
1627
+ raise ValueError("Please enter a valid age number.")
1628
+
1629
+ def validate_file(file_obj) -> None:
1630
+ if not file_obj:
1631
+ raise ValueError("Please upload a file first")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1632
 
1633
+ file_ext = os.path.splitext(file_obj.name)[1].lower()
1634
+ if file_ext not in ALLOWED_FILE_TYPES:
1635
+ raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
 
 
 
 
1636
 
1637
+ file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
1638
+ if file_size > MAX_FILE_SIZE_MB:
1639
+ raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1640
 
1641
+ def remove_sensitive_info(text: str) -> str:
1642
+ patterns = [
1643
+ (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
1644
+ (r'\b\d{6,9}\b', '[ID]'),
1645
+ (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
1646
+ (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'),
1647
+ (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),
1648
+ (r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'),
1649
+ (r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]')
1650
+ ]
1651
+
1652
+ for pattern, replacement in patterns:
1653
+ text = re.sub(pattern, replacement, text)
1654
+ return text
1655
 
1656
+ # ========== GRADIO INTERFACE ==========
1657
  def create_enhanced_interface():
1658
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
1659
  session_token = gr.State(value=generate_session_token())
1660
  profile_manager.set_session(session_token.value)
1661
 
1662
  tab_completed = gr.State({
1663
+ 0: False,
1664
+ 1: False,
1665
+ 2: False,
1666
+ 3: False,
1667
+ 4: False,
1668
+ 5: False
1669
  })
1670
 
 
1671
  app.css = """
1672
  .gradio-container {
1673
  max-width: 1200px !important;
 
1780
  border-left: 4px solid #2196F3;
1781
  }
1782
 
 
1783
  .dark .tab-content {
1784
  background-color: #2d2d2d !important;
1785
  border-color: #444 !important;
 
1821
  }
1822
  """
1823
 
 
1824
  with gr.Row():
1825
  with gr.Column(scale=4):
1826
  gr.Markdown("""
 
1831
  with gr.Column(scale=1):
1832
  dark_mode = gr.Checkbox(label="Dark Mode", value=False)
1833
 
 
1834
  with gr.Row():
1835
  with gr.Column(scale=1, min_width=100):
1836
  step1 = gr.Button("πŸ“„ 1. Transcript", elem_classes="incomplete-tab")
 
1847
 
1848
  nav_message = gr.HTML(visible=False)
1849
 
 
1850
  with gr.Tabs(visible=True) as tabs:
 
1851
  with gr.Tab("Transcript", id=0):
1852
  with gr.Row():
1853
  with gr.Column(scale=1):
 
1888
 
1889
  def process_and_visualize(file_obj, tab_status):
1890
  try:
1891
+ parsed_data = transcript_parser.parse_transcript(file_obj.name)
 
1892
 
 
1893
  gpa_analysis = academic_analyzer.analyze_gpa(parsed_data)
1894
  grad_status = academic_analyzer.analyze_graduation_status(parsed_data)
1895
  college_recs = academic_analyzer.generate_college_recommendations(parsed_data)
1896
 
 
1897
  results = [
1898
  f"## πŸ“Š GPA Analysis",
1899
  f"**Rating:** {gpa_analysis['rating']}",
 
1923
  results.append("\n**Improvement Tips:**")
1924
  results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']])
1925
 
 
1926
  viz_updates = [
1927
  gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None),
1928
  gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None),
 
1930
  gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None)
1931
  ]
1932
 
 
1933
  tab_status[0] = True
1934
 
1935
  return "\n".join(results), parsed_data, *viz_updates, tab_status
 
1950
  outputs=step2
1951
  )
1952
 
 
1953
  with gr.Tab("Learning Style Quiz", id=1):
1954
  with gr.Column():
1955
  gr.Markdown("### πŸ“ Step 2: Discover Your Learning Style")
 
2015
  outputs=progress
2016
  )
2017
 
 
2018
  with gr.Tab("Personal Profile", id=2):
2019
  with gr.Row():
2020
  with gr.Column(scale=1):
 
2060
  outputs=[tab_completed, step3, step4, save_confirmation]
2061
  )
2062
 
 
2063
  with gr.Tab("Save Profile", id=3):
2064
  with gr.Row():
2065
  with gr.Column(scale=1):
 
2159
  ]
2160
  )
2161
 
 
2162
  with gr.Tab("AI Assistant", id=4):
2163
  gr.Markdown("## πŸ’¬ Your Personalized Learning Assistant")
2164
  gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
2165
 
 
2166
  chatbot = gr.Chatbot(height=500)
2167
  msg = gr.Textbox(label="Your Message")
2168
  clear = gr.Button("Clear")
 
2175
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
2176
  clear.click(lambda: None, None, chatbot, queue=False)
2177
 
 
2178
  with gr.Tab("Goals & Planning", id=5):
2179
  with gr.Row():
2180
  with gr.Column(scale=1):
 
2203
  calendar_output = gr.HTML()
2204
  calendar_viz = gr.Plot(label="Calendar Visualization", visible=False)
2205
 
 
2206
  goal_type.change(
2207
  fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]),
2208
  inputs=goal_type,
 
2255
 
2256
  calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat())
2257
 
 
2258
  calendar_html = []
2259
  current_date = datetime.date.fromisoformat(calendar['start_date'])
2260
  end_date = datetime.date.fromisoformat(calendar['end_date'])
 
2298
  gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None)
2299
  )
2300
 
 
2301
  add_goal_btn.click(
2302
  fn=lambda gt, desc, date, val: (
2303
  goal_tracker.add_goal(name.value, gt, desc, date, val),
 
2315
  outputs=[goals_output, goal_viz]
2316
  )
2317
 
 
2318
  generate_calendar_btn.click(
2319
  fn=lambda date: update_calendar_display(name.value, date),
2320
  inputs=calendar_start_date,
2321
  outputs=[calendar_output, calendar_viz]
2322
  )
2323
 
 
2324
  def navigate_to_tab(tab_index: int, tab_completed_status: dict):
 
2325
  for i in range(tab_index):
2326
  if not tab_completed_status.get(i, False):
2327
  messages = [
 
2332
  "Please complete the previous steps first"
2333
  ]
2334
  return (
2335
+ gr.Tabs(selected=i),
2336
  gr.update(
2337
  value=f"<div class='error-message'>β›” {messages[i]}</div>",
2338
  visible=True
 
2372
  outputs=[tabs, nav_message]
2373
  )
2374
 
 
2375
  def toggle_dark_mode(dark):
2376
  return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
2377
 
 
2381
  outputs=None
2382
  )
2383
 
 
2384
  app.load(fn=lambda: model_loader.load_model(), outputs=[])
2385
 
2386
  return app