Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 25

Commit

5261899

verified ·

1 Parent(s): 9c9be5d

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -953

app.py CHANGED Viewed

@@ -38,17 +38,17 @@ import matplotlib.pyplot as plt
 # Enhanced Configuration
 PROFILES_DIR = "student_profiles"
 ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"]
-MAX_FILE_SIZE_MB = 10  # Increased from 5MB
 MIN_AGE = 5
 MAX_AGE = 120
 SESSION_TOKEN_LENGTH = 32
 HF_TOKEN = os.getenv("HF_TOKEN")
 ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode())
-SESSION_TIMEOUT = 3600 * 3  # 3 hour session timeout
 MAX_CONTEXT_HISTORY = 10
 MAX_PROFILE_LOAD_ATTEMPTS = 3
-# Initialize logging with enhanced configuration
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -59,10 +59,10 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Model configuration - Using more capable model
-MODEL_NAME = "deepseek-ai/deepseek-llm-7b"  # Upgraded from 1.3b to 7b
-# Initialize Hugging Face API with retry logic
 if HF_TOKEN:
     hf_api = None
     for attempt in range(3):
@@ -73,7 +73,7 @@ if HF_TOKEN:
             break
         except Exception as e:
             logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
-            time.sleep(2 ** attempt)  # Exponential backoff
 # ========== LEARNING STYLE QUIZ ==========
 class LearningStyleQuiz:
@@ -119,7 +119,6 @@ class LearningStyleQuiz:
             'kinesthetic': 0
         }
-        # Map each answer to a learning style
         for answer in answers:
             if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower():
                 style_counts['visual'] += 1
@@ -133,7 +132,6 @@ class LearningStyleQuiz:
         primary_style = max(style_counts, key=style_counts.get)
         secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3]
-        # Generate results
         result = [
             "## 🎯 Your Learning Style Results",
             f"Your primary learning style is **{primary_style.capitalize()}**",
@@ -183,7 +181,7 @@ class LearningStyleQuiz:
 # Initialize learning style quiz
 learning_style_quiz = LearningStyleQuiz()
-# ========== ENHANCED MODEL LOADER ==========
 class ModelLoader:
     def __init__(self):
         self.model = None
@@ -196,7 +194,6 @@ class ModelLoader:
         self.max_retries = 3
     def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
-        """Enhanced lazy load the model with progress feedback and retry logic"""
         if self.loaded:
             return self.model, self.tokenizer
@@ -212,7 +209,6 @@ class ModelLoader:
             if progress:
                 progress(0.1, desc="Initializing model environment...")
-            # Clear GPU cache more aggressively
             if self.device == "cuda":
                 torch.cuda.empty_cache()
                 torch.cuda.reset_peak_memory_stats()
@@ -220,7 +216,6 @@ class ModelLoader:
             if progress:
                 progress(0.2, desc="Loading tokenizer...")
-            # Tokenizer with more error handling
             tokenizer = None
             for attempt in range(3):
                 try:
@@ -239,7 +234,6 @@ class ModelLoader:
             if progress:
                 progress(0.5, desc="Loading model (this may take a few minutes)...")
-            # Model configuration with fallbacks
             model_kwargs = {
                 "trust_remote_code": True,
                 "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
@@ -248,7 +242,6 @@ class ModelLoader:
                 "offload_folder": "offload"
             }
-            # Add max_memory configuration if multiple GPUs available
             if torch.cuda.device_count() > 1:
                 model_kwargs["max_memory"] = {i: "20GiB" for i in range(torch.cuda.device_count())}
@@ -275,7 +268,6 @@ class ModelLoader:
                     logger.warning(f"Model loading attempt {attempt + 1} failed: {str(e)}")
                     time.sleep(2 ** attempt)
-            # Test inference
             if progress:
                 progress(0.8, desc="Verifying model...")
             test_input = tokenizer("Test", return_tensors="pt").to(self.device)
@@ -307,580 +299,89 @@ model_loader = ModelLoader()
 def get_model_and_tokenizer():
     return model_loader.load_model()
-# ========== ENHANCED UTILITY FUNCTIONS ==========
-class DataEncryptor:
-    def __init__(self, key: str):
-        self.cipher = Fernet(key.encode())
-    def encrypt(self, data: str) -> str:
-        return self.cipher.encrypt(data.encode()).decode()
-    def decrypt(self, encrypted_data: str) -> str:
-        return self.cipher.decrypt(encrypted_data.encode()).decode()
-encryptor = DataEncryptor(ENCRYPTION_KEY)
-def generate_session_token() -> str:
-    alphabet = string.ascii_letters + string.digits
-    return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
-def sanitize_input(text: str) -> str:
-    if not text:
-        return ""
-    text = html.escape(text.strip())
-    text = re.sub(r'<[^>]*>', '', text)
-    text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
-    return text
-def validate_name(name: str) -> str:
-    name = name.strip()
-    if not name:
-        raise ValueError("Name cannot be empty.")
-    if len(name) > 100:
-        raise ValueError("Name is too long (maximum 100 characters).")
-    if any(c.isdigit() for c in name):
-        raise ValueError("Name cannot contain numbers.")
-    return name
-def validate_age(age: Union[int, float, str]) -> int:
-    try:
-        age_int = int(age)
-        if not MIN_AGE <= age_int <= MAX_AGE:
-            raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
-        return age_int
-    except (ValueError, TypeError):
-        raise ValueError("Please enter a valid age number.")
-def validate_file(file_obj) -> None:
-    if not file_obj:
-        raise ValueError("Please upload a file first")
-    file_ext = os.path.splitext(file_obj.name)[1].lower()
-    if file_ext not in ALLOWED_FILE_TYPES:
-        raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
-    file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
-    if file_size > MAX_FILE_SIZE_MB:
-        raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
-def remove_sensitive_info(text: str) -> str:
-    """Enhanced PII removal with more patterns"""
-    patterns = [
-        (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
-        (r'\b\d{6,9}\b', '[ID]'),
-        (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
-        (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'),
-        (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),  # Simple name pattern
-        (r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'),
-        (r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]')
-    ]
-    for pattern, replacement in patterns:
-        text = re.sub(pattern, replacement, text)
-    return text
-# ========== ENHANCED PDF PARSING ==========
-class EnhancedTranscriptParser:
     def __init__(self):
-        self.common_school_patterns = {
-            'miami_dade': r'(MIAMI-DADE|DADE COUNTY|MDCPS)',
-            'broward': r'(BROWARD COUNTY|BCPS)',
-            'florida': r'(FLORIDA|FDOE|FL DOE)'
-        }
-        self.transcript_templates = {
-            'miami_dade': self._parse_miami_dade_transcript,
-            'broward': self._parse_broward_transcript,
-            'florida': self._parse_florida_standard_transcript,
-            'default': self._parse_generic_transcript
-        }
-    def detect_transcript_type(self, text: str) -> str:
-        """Detect the transcript format based on patterns"""
-        text = text.upper()
-        for template, pattern in self.common_school_patterns.items():
-            if re.search(pattern, text):
-                return template
-        return 'default'
-    def parse_transcript(self, file_path: str, file_ext: str) -> Dict:
-        """Enhanced parsing with format detection and fallbacks"""
-        try:
-            # First extract text with appropriate method
-            text = self.extract_text_from_file(file_path, file_ext)
-            if not text.strip():
-                raise ValueError("No text could be extracted from file")
-            # Detect transcript type
-            transcript_type = self.detect_transcript_type(text)
-            logger.info(f"Detected transcript type: {transcript_type}")
-            # Try specialized parser first
-            parser_func = self.transcript_templates.get(transcript_type, self._parse_generic_transcript)
-            parsed_data = parser_func(text)
-            if not parsed_data:
-                logger.warning(f"Specialized parser failed, trying generic parser")
-                parsed_data = self._parse_generic_transcript(text)
-            if not parsed_data:
-                raise ValueError("No data could be parsed from transcript")
-            # Validate and enhance parsed data
-            self.validate_parsed_data(parsed_data)
-            self.enhance_parsed_data(parsed_data)
-            return parsed_data
-        except Exception as e:
-            logger.error(f"Error parsing transcript: {str(e)}")
-            raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
-    def extract_text_from_file(self, file_path: str, file_ext: str) -> str:
-        """Enhanced text extraction with multiple fallbacks"""
-        text = ""
-        try:
-            if file_ext == '.pdf':
-                # Try pdfplumber first for better table handling
-                try:
-                    with pdfplumber.open(file_path) as pdf:
-                        for page in pdf.pages:
-                            # Try to extract tables first
-                            tables = page.extract_tables({
-                                "vertical_strategy": "text",
-                                "horizontal_strategy": "text",
-                                "intersection_y_tolerance": 10,
-                                "join_tolerance": 20
-                            })
-                            if tables:
-                                for table in tables:
-                                    for row in table:
-                                        text += " | ".join(str(cell).strip() for cell in row if cell) + "\n"
-                            # Fall back to text extraction if tables are empty
-                            page_text = page.extract_text()
-                            if page_text:
-                                text += page_text + "\n"
-                    if not text.strip():
-                        raise ValueError("PDFPlumber returned empty text")
-                except Exception as e:
-                    logger.warning(f"PDFPlumber failed: {str(e)}. Trying PyMuPDF...")
-                    doc = fitz.open(file_path)
-                    for page in doc:
-                        text += page.get_text("text", flags=fitz.TEXT_PRESERVE_IMAGES) + '\n'
-            elif file_ext in ['.png', '.jpg', '.jpeg']:
-                text = self.extract_text_with_enhanced_ocr(file_path)
-            text = self.clean_extracted_text(text)
-            if not text.strip():
-                raise ValueError("The file appears to be empty or contains no readable text.")
-            return text
-        except Exception as e:
-            logger.error(f"Text extraction error: {str(e)}")
-            raise ValueError(f"Failed to extract text: {str(e)}")
-    def extract_text_with_enhanced_ocr(self, file_path: str) -> str:
-        """Enhanced OCR with preprocessing"""
-        try:
-            image = Image.open(file_path)
-            # Preprocessing for better OCR
-            image = image.convert('L')  # Grayscale
-            image = image.point(lambda x: 0 if x < 140 else 255, '1')  # Thresholding
-            # Custom config for academic documents
-            custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-.,:()%$@ '
-            # Try with different page segmentation modes
-            for psm in [6, 11, 4]:  # Try different modes
-                text = pytesseract.image_to_string(image, config=f"{custom_config} --psm {psm}")
-                if len(text.strip()) > 50:  # If we got reasonable text
-                    break
-            return text
-        except Exception as e:
-            raise ValueError(f"OCR processing failed: {str(e)}")
-    def clean_extracted_text(self, text: str) -> str:
-        """Enhanced cleaning for academic transcripts"""
-        # Normalize whitespace and case
-        text = re.sub(r'\s+', ' ', text).strip()
-        # Fix common OCR errors in academic contexts
-        replacements = {
-            'GradeLv1': 'GradeLvl',
-            'CrsNu m': 'CrsNum',
-            'YOG': 'Year of Graduation',
-            'Comm Serv': 'Community Service',
-            r'\bA\s*-\s*': 'A-',  # Fix requirement codes
-            r'\bB\s*-\s*': 'B-',
-            r'\bC\s*-\s*': 'C-',
-            r'\bD\s*-\s*': 'D-',
-            r'\bE\s*-\s*': 'E-',
-            r'\bF\s*-\s*': 'F-',
-            r'\bG\s*-\s*': 'G-',
-            r'\bZ\s*-\s*': 'Z-',
-            'lnProgress': 'inProgress',
-            'lP': 'IP',
-            'AP\s': 'AP ',
-            'DE\s': 'DE ',
-            'Honors\s': 'Honors ',
-            'lB': 'IB'
-        }
-        for pattern, replacement in replacements.items():
-            text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
-        # Fix course codes with spaces
-        text = re.sub(r'(\b[A-Z]{2,4})\s(\d{3}[A-Z]?\b)', r'\1\2', text)
-        return text
-    def validate_parsed_data(self, parsed_data: Dict) -> bool:
-        """Enhanced validation with more fields"""
-        required_fields = [
-            ('student_info', 'name'),
-            ('student_info', 'id'),
-            ('requirements',),  # At least some requirements
-            ('course_history',)  # At least some courses
-        ]
-        for path in required_fields:
-            current = parsed_data
-            for key in path:
-                if key not in current:
-                    raise ValueError(f"Missing critical field: {'.'.join(path)}")
-                current = current[key]
-        return True
-    def enhance_parsed_data(self, parsed_data: Dict) -> Dict:
-        """Add derived fields and calculations"""
-        # Calculate total credits if not present
-        if 'total_credits' not in parsed_data.get('student_info', {}):
-            try:
-                total_credits = sum(
-                    float(course.get('credits', 0))
-                    for course in parsed_data.get('course_history', [])
-                    if course and str(course.get('credits', '0')).replace('.', '').isdigit()
-                )
-                parsed_data['student_info']['total_credits'] = round(total_credits, 2)
-            except:
-                pass
-        # Calculate GPA if not present
-        if 'weighted_gpa' not in parsed_data.get('student_info', {}):
-            try:
-                grades = []
-                grade_points = {
-                    'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7,
-                    'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0
-                }
-                for course in parsed_data.get('course_history', []):
-                    grade = course.get('grade_earned', '').upper()
-                    if grade in grade_points:
-                        grades.append(grade_points[grade])
-                if grades:
-                    unweighted_gpa = sum(grades) / len(grades)
-                    parsed_data['student_info']['unweighted_gpa'] = round(unweighted_gpa, 2)
-                    # Simple weighted GPA calculation (AP/IB/DE courses get +1)
-                    weighted_grades = []
-                    for course in parsed_data.get('course_history', []):
-                        grade = course.get('grade_earned', '').upper()
-                        if grade in grade_points:
-                            weight = 1.0 if any(x in course.get('course_name', '').upper()
-                                     for x in ['AP', 'IB', 'DE', 'HONORS']) else 0.0
-                            weighted_grades.append(grade_points[grade] + weight)
-                    if weighted_grades:
-                        parsed_data['student_info']['weighted_gpa'] = round(sum(weighted_grades) / len(weighted_grades), 2)
-            except:
-                pass
-        return parsed_data
-    def _parse_miami_dade_transcript(self, text: str) -> Optional[Dict]:
-        """Enhanced Miami-Dade parser with better table handling"""
-        try:
-            parsed_data = {
-                'student_info': {},
-                'requirements': {},
-                'course_history': [],
-                'assessments': {}
-            }
-            # Extract student info with more robust pattern
-            student_info_match = re.search(
-                r"(\d{7})\s*-\s*(.*?)\s*\n.*?Current Grade:\s*(\d+).*?YOG\s*(\d{4})",
-                text,
-                re.DOTALL | re.IGNORECASE
-            )
-            if student_info_match:
-                parsed_data['student_info'] = {
-                    'id': student_info_match.group(1),
-                    'name': student_info_match.group(2).strip(),
-                    'grade': student_info_match.group(3),
-                    'year_of_graduation': student_info_match.group(4),
-                    'district': 'Miami-Dade'
-                }
-            # Extract GPA information with more flexible patterns
-            gpa_patterns = [
-                r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
-                r"GPA\s*\(.*?\)\s*:\s*([\d.]+)",
-                r"Grade\s*Point\s*Average\s*:\s*([\d.]+)"
-            ]
-            gpa_values = []
-            for pattern in gpa_patterns:
-                gpa_values.extend(re.findall(pattern, text, re.IGNORECASE))
-                if len(gpa_values) >= 2:
-                    break
-            if len(gpa_values) >= 1:
-                parsed_data['student_info']['unweighted_gpa'] = float(gpa_values[0])
-            if len(gpa_values) >= 2:
-                parsed_data['student_info']['weighted_gpa'] = float(gpa_values[1])
-            # Extract community service info
-            service_hours_match = re.search(r"Comm\s*Serv\s*Hours\s*(\d+)", text, re.IGNORECASE)
-            if service_hours_match:
-                parsed_data['student_info']['community_service_hours'] = int(service_hours_match.group(1))
-            service_date_match = re.search(r"Comm\s*Serv\s*Date\s*(\d{2}/\d{2}/\d{4})", text, re.IGNORECASE)
-            if service_date_match:
-                parsed_data['student_info']['community_service_date'] = service_date_match.group(1)
-            # Extract credits info
-            credits_match = re.search(r"Total\s*Credits\s*Earned\s*([\d.]+)", text, re.IGNORECASE)
-            if credits_match:
-                parsed_data['student_info']['total_credits'] = float(credits_match.group(1))
-            # Extract virtual grade
-            virtual_grade_match = re.search(r"Virtual\s*Grade\s*([A-Z])", text, re.IGNORECASE)
-            if virtual_grade_match:
-                parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
-            # Enhanced requirements section parsing
-            req_section = re.search(
-                r"(?:Graduation\s*Requirements|Requirements\s*Summary).*?(Code\s*Description.*?)(?:\n\s*\n|$)",
-                text,
-                re.DOTALL | re.IGNORECASE
-            )
-            if req_section:
-                req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
-                for line in req_lines:
-                    if '|' in line:  # Table format
-                        parts = [part.strip() for part in line.split('|') if part.strip()]
-                        if len(parts) >= 5:  # More lenient check for number of columns
-                            try:
-                                code = parts[0] if len(parts) > 0 else ""
-                                description = parts[1] if len(parts) > 1 else ""
-                                required = float(parts[2]) if len(parts) > 2 and parts[2].replace('.','').isdigit() else 0.0
-                                waived = float(parts[3]) if len(parts) > 3 and parts[3].replace('.','').isdigit() else 0.0
-                                completed = float(parts[4]) if len(parts) > 4 and parts[4].replace('.','').isdigit() else 0.0
-                                status = parts[5] if len(parts) > 5 else ""
-                                # Extract percentage if available
-                                percent = 0.0
-                                if status:
-                                    percent_match = re.search(r"(\d+)%", status)
-                                    if percent_match:
-                                        percent = float(percent_match.group(1))
-                                parsed_data['requirements'][code] = {
-                                    "description": description,
-                                    "required": required,
-                                    "waived": waived,
-                                    "completed": completed,
-                                    "percent_complete": percent,
-                                    "status": status
-                                }
-                            except (IndexError, ValueError) as e:
-                                logger.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}")
-                                continue
-            # Enhanced course history parsing
-            course_section = re.search(
-                r"(?:Course\s*History|Academic\s*Record).*?(Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits.*?)(?:\n\s*\n|$)",
-                text,
-                re.DOTALL | re.IGNORECASE
-            )
-            if course_section:
-                course_lines = [
-                    line.strip() for line in course_section.group(1).split('\n')
-                    if line.strip() and '|' in line
-                ]
-                for line in course_lines:
-                    parts = [part.strip() for part in line.split('|') if part.strip()]
-                    try:
-                        course = {
-                            'requirement': parts[0] if len(parts) > 0 else "",
-                            'school_year': parts[1] if len(parts) > 1 else "",
-                            'grade_level': parts[2] if len(parts) > 2 else "",
-                            'course_code': parts[3] if len(parts) > 3 else "",
-                            'description': parts[4] if len(parts) > 4 else "",
-                            'term': parts[5] if len(parts) > 5 else "",
-                            'district_number': parts[6] if len(parts) > 6 else "",
-                            'fg': parts[7] if len(parts) > 7 else "",
-                            'included': parts[8] if len(parts) > 8 else "",
-                            'credits': parts[9] if len(parts) > 9 else "0",
-                            'status': 'Completed' if parts[9] and parts[9] != 'inProgress' else 'In Progress'
-                        }
-                        # Handle credits conversion
-                        if "inprogress" in course['credits'].lower() or not course['credits']:
-                            course['credits'] = "0"
-                        elif not course['credits'].replace('.','').isdigit():
-                            course['credits'] = "0"
-                        parsed_data['course_history'].append(course)
-                    except (IndexError, ValueError) as e:
-                        logger.warning(f"Skipping malformed course line: {line}. Error: {str(e)}")
-                        continue
-            return parsed_data
-        except Exception as e:
-            logger.warning(f"Miami-Dade transcript parsing failed: {str(e)}")
-            return None
-    def _parse_broward_transcript(self, text: str) -> Optional[Dict]:
-        """Parser for Broward County transcripts"""
-        try:
             parsed_data = {
-                'student_info': {},
-                'requirements': {},
-                'course_history': [],
-                'assessments': {}
             }
-            # Broward-specific patterns
-            student_info_match = re.search(
-                r"Student:\s*(\d+)\s*-\s*(.*?)\s*Grade:\s*(\d+)",
-                text,
-                re.IGNORECASE
-            )
-            if student_info_match:
-                parsed_data['student_info'] = {
-                    'id': student_info_match.group(1),
-                    'name': student_info_match.group(2).strip(),
-                    'grade': student_info_match.group(3),
-                    'district': 'Broward'
-                }
-            # Add Broward-specific parsing logic here...
             return parsed_data
-        except Exception as e:
-            logger.warning(f"Broward transcript parsing failed: {str(e)}")
-            return None
-    def _parse_florida_standard_transcript(self, text: str) -> Optional[Dict]:
-        """Parser for Florida standard transcripts"""
-        try:
-            parsed_data = {
-                'student_info': {},
-                'requirements': {},
-                'course_history': [],
-                'assessments': {}
-            }
-            # Florida standard patterns
-            student_info_match = re.search(
-                r"Florida\s*Student\s*Transcript.*?Name:\s*(.*?)\s*ID:\s*(\d+)",
-                text,
-                re.IGNORECASE | re.DOTALL
-            )
-            if student_info_match:
-                parsed_data['student_info'] = {
-                    'name': student_info_match.group(1).strip(),
-                    'id': student_info_match.group(2),
-                    'district': 'Florida'
-                }
-            # Add Florida standard parsing logic here...
-            return parsed_data
-        except Exception as e:
-            logger.warning(f"Florida standard transcript parsing failed: {str(e)}")
-            return None
-    def _parse_generic_transcript(self, text: str) -> Optional[Dict]:
-        """Fallback parser for generic transcripts"""
-        try:
-            parsed_data = {
-                'student_info': {},
-                'requirements': {},
-                'course_history': [],
-                'assessments': {}
             }
-            # Try to extract basic student info
-            name_match = re.search(r"(?:Student|Name):\s*(.*?)\s*(?:\n|ID|$)", text, re.IGNORECASE)
-            if name_match:
-                parsed_data['student_info']['name'] = name_match.group(1).strip()
-            id_match = re.search(r"(?:ID|Student\s*Number):\s*(\d+)", text, re.IGNORECASE)
-            if id_match:
-                parsed_data['student_info']['id'] = id_match.group(1)
-            # Try to extract courses
-            course_patterns = [
-                r"([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)",  # CODE DESC GRADE CREDITS
-                r"(\d{4}-\d{4})\s+([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)",  # YEAR CODE DESC GRADE CREDITS
-                r"(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)"  # DESC GRADE CREDITS
-            ]
-            for pattern in course_patterns:
-                courses = re.findall(pattern, text)
-                if courses:
-                    for course in courses:
-                        if len(course) == 4:
-                            parsed_data['course_history'].append({
-                                'course_code': course[0],
-                                'description': course[1],
-                                'grade': course[2],
-                                'credits': course[3]
-                            })
-                        elif len(course) == 5:
-                            parsed_data['course_history'].append({
-                                'school_year': course[0],
-                                'course_code': course[1],
-                                'description': course[2],
-                                'grade': course[3],
-                                'credits': course[4]
-                            })
-                        elif len(course) == 3:
-                            parsed_data['course_history'].append({
-                                'description': course[0],
-                                'grade': course[1],
-                                'credits': course[2]
-                            })
-                    break
-            return parsed_data if parsed_data['course_history'] else None
-        except Exception as e:
-            logger.warning(f"Generic transcript parsing failed: {str(e)}")
-            return None
-# Initialize enhanced parser
-transcript_parser = EnhancedTranscriptParser()
-# ========== ENHANCED ANALYSIS FUNCTIONS ==========
 class AcademicAnalyzer:
     def __init__(self):
         self.gpa_scale = {
@@ -896,7 +397,6 @@ class AcademicAnalyzer:
         }
     def analyze_gpa(self, parsed_data: Dict) -> Dict:
-        """Enhanced GPA analysis with more detailed feedback"""
         analysis = {
             'rating': '',
             'description': '',
@@ -954,7 +454,6 @@ class AcademicAnalyzer:
                     "Focus on fundamental study skills"
                 ]
-            # Add comparison between weighted and unweighted
             if weighted_gpa > 0 and unweighted_gpa > 0:
                 diff = weighted_gpa - unweighted_gpa
                 if diff > 0.5:
@@ -974,7 +473,6 @@ class AcademicAnalyzer:
             }
     def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
-        """Enhanced graduation analysis with requirement breakdown"""
         analysis = {
             'status': '',
             'completion_percentage': 0,
@@ -998,7 +496,6 @@ class AcademicAnalyzer:
             analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0
-            # Identify missing requirements
             analysis['missing_requirements'] = [
                 {
                     'code': code,
@@ -1010,7 +507,6 @@ class AcademicAnalyzer:
                 if req and float(req.get('completed', 0)) < float(req.get('required', 0))
             ]
-            # Determine status message
             current_grade = parsed_data.get('student_info', {}).get('grade', '')
             grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '')
@@ -1030,7 +526,6 @@ class AcademicAnalyzer:
                 analysis['status'] = f"❌ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed."
                 analysis['on_track'] = False
-            # Add timeline projection if possible
             if current_grade and grad_year:
                 remaining_credits = total_required - total_completed
                 years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade)
@@ -1053,7 +548,6 @@ class AcademicAnalyzer:
             }
     def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
-        """Analyze the difficulty level of courses taken"""
         analysis = {
             'advanced_courses': 0,
             'honors_courses': 0,
@@ -1127,7 +621,6 @@ class AcademicAnalyzer:
             }
     def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
-        """Enhanced college recommendations based on full profile"""
         recommendations = {
             'reach': [],
             'target': [],
@@ -1137,12 +630,10 @@ class AcademicAnalyzer:
         }
         try:
-            # Get key metrics
             weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
             rigor_analysis = self.analyze_course_rigor(parsed_data)
             service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0))
-            # Determine college tiers
             if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100:
                 recommendations['reach'].extend([
                     "Ivy League: Harvard, Yale, Princeton, Columbia, etc.",
@@ -1190,7 +681,6 @@ class AcademicAnalyzer:
                     "Technical Schools"
                 ])
-            # Scholarship recommendations
             if weighted_gpa >= 4.0:
                 recommendations['scholarships'].extend([
                     "National Merit Scholarship",
@@ -1210,7 +700,6 @@ class AcademicAnalyzer:
                     "First-Generation Student Programs"
                 ])
-            # Improvement areas
             if weighted_gpa < 3.5:
                 recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring")
             if rigor_analysis['advanced_courses'] < 4:
@@ -1229,7 +718,6 @@ class AcademicAnalyzer:
             }
     def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
-        """Generate personalized study plan based on learning style and courses"""
         plan = {
             'weekly_schedule': {},
             'study_strategies': [],
@@ -1238,19 +726,16 @@ class AcademicAnalyzer:
         }
         try:
-            # Get current courses
             current_courses = [
                 course for course in parsed_data.get('course_history', [])
                 if course.get('status', '').lower() == 'in progress'
             ]
-            # Generate weekly schedule template
             days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
             for day in days:
                 plan['weekly_schedule'][day] = []
-            # Add study blocks based on learning style
-            study_blocks = 2  # Default
             if learning_style.lower() == 'visual':
                 study_blocks = 3
                 plan['study_strategies'].extend([
@@ -1280,9 +765,8 @@ class AcademicAnalyzer:
                     "Use hands-on activities when possible"
                 ])
-            # Distribute study blocks
             for i, course in enumerate(current_courses):
-                day_index = i % 5  # Monday-Friday
                 day = days[day_index]
                 plan['weekly_schedule'][day].append({
                     'course': course.get('description', 'Course'),
@@ -1294,14 +778,12 @@ class AcademicAnalyzer:
                     ]
                 })
-            # Add time management tips
             plan['time_management_tips'].extend([
                 "Use the Pomodoro technique (25 min study, 5 min break)",
                 "Prioritize assignments by due date and importance",
                 "Schedule regular review sessions"
             ])
-            # Add resource recommendations
             plan['resource_recommendations'].extend([
                 "Khan Academy for math and science",
                 "Quizlet for flashcards",
@@ -1320,7 +802,7 @@ class AcademicAnalyzer:
 # Initialize academic analyzer
 academic_analyzer = AcademicAnalyzer()
-# ========== ENHANCED VISUALIZATION FUNCTIONS ==========
 class DataVisualizer:
     def __init__(self):
         self.color_palette = {
@@ -1335,7 +817,6 @@ class DataVisualizer:
         }
     def create_gpa_visualization(self, parsed_data: Dict):
-        """Enhanced GPA visualization with more details"""
         try:
             gpa_data = {
                 "Type": ["Weighted GPA", "Unweighted GPA"],
@@ -1362,7 +843,6 @@ class DataVisualizer:
                 hover_data={"Type": True, "Value": ":.2f"}
             )
-            # Add reference lines and annotations
             fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left")
             fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left")
             fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left")
@@ -1389,7 +869,6 @@ class DataVisualizer:
             return None
     def create_requirements_visualization(self, parsed_data: Dict):
-        """Enhanced requirements visualization with interactive elements"""
         try:
             req_data = []
             for code, req in parsed_data.get('requirements', {}).items():
@@ -1448,21 +927,20 @@ class DataVisualizer:
             return None
     def create_credits_distribution_visualization(self, parsed_data: Dict):
-        """Enhanced credits distribution visualization"""
         try:
             core_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
-                if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D']  # English, Math, Science, Social Studies
             )
             elective_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
-                if req and req.get('code', '').split('-')[0] in ['G', 'H']  # Electives
             )
             other_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
-                if req and req.get('code', '').split('-')[0] in ['E', 'F']  # Arts, PE
             )
             credit_values = [core_credits, elective_credits, other_credits]
@@ -1510,7 +988,6 @@ class DataVisualizer:
             return None
     def create_course_rigor_visualization(self, parsed_data: Dict):
-        """Visualization of course rigor analysis"""
         try:
             rigor = academic_analyzer.analyze_course_rigor(parsed_data)
@@ -1559,7 +1036,7 @@ class DataVisualizer:
 # Initialize visualizer
 data_visualizer = DataVisualizer()
-# ========== ENHANCED PROFILE MANAGEMENT ==========
 class EnhancedProfileManager:
     def __init__(self):
         self.profiles_dir = Path(PROFILES_DIR)
@@ -1581,7 +1058,6 @@ class EnhancedProfileManager:
                     movie: str, movie_reason: str, show: str, show_reason: str,
                     book: str, book_reason: str, character: str, character_reason: str,
                     blog: str, study_plan: Dict = None) -> str:
-        """Enhanced profile saving with encryption and validation"""
         try:
             name = validate_name(name)
             age = validate_age(age)
@@ -1595,7 +1071,6 @@ class EnhancedProfileManager:
             if not learning_style or "Your primary learning style is" not in learning_style:
                 raise ValueError("Please complete the learning style quiz first.")
-            # Prepare favorites with sanitization
             favorites = {
                 "movie": sanitize_input(movie),
                 "movie_reason": sanitize_input(movie_reason),
@@ -1607,7 +1082,6 @@ class EnhancedProfileManager:
                 "character_reason": sanitize_input(character_reason)
             }
-            # Generate study plan if not provided
             if not study_plan:
                 learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style)
                 if learning_style_match:
@@ -1615,30 +1089,27 @@ class EnhancedProfileManager:
                         transcript,
                         learning_style_match.group(1))
-            # Prepare data with encryption for sensitive fields
             data = {
                 "name": self.encryptor.encrypt(name),
                 "age": age,
                 "interests": self.encryptor.encrypt(sanitize_input(interests)),
-                "transcript": transcript,  # Already sanitized during parsing
                 "learning_style": learning_style,
                 "favorites": favorites,
                 "blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "",
                 "study_plan": study_plan if study_plan else {},
                 "session_token": self.current_session,
                 "last_updated": time.time(),
-                "version": "2.0"  # Profile version for compatibility
             }
             filepath = self.get_profile_path(name)
-            # Save with atomic write
             temp_path = filepath.with_suffix('.tmp')
             with open(temp_path, "w", encoding='utf-8') as f:
                 json.dump(data, f, indent=2, ensure_ascii=False)
-            temp_path.replace(filepath)  # Atomic replace
-            # Optional cloud backup
             if HF_TOKEN and hf_api:
                 try:
                     hf_api.upload_file(
@@ -1658,7 +1129,6 @@ class EnhancedProfileManager:
             raise gr.Error(f"Couldn't save profile: {str(e)}")
     def load_profile(self, name: str = None, session_token: str = None) -> Dict:
-        """Enhanced profile loading with decryption and retries"""
         for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS):
             try:
                 if session_token:
@@ -1673,7 +1143,6 @@ class EnhancedProfileManager:
                 if name:
                     profile_file = self.get_profile_path(name)
                     if not profile_file.exists():
-                        # Try to download from Hugging Face Hub
                         if HF_TOKEN and hf_api:
                             try:
                                 hf_api.download_file(
@@ -1688,18 +1157,15 @@ class EnhancedProfileManager:
                         else:
                             raise gr.Error(f"No profile found for {name}")
                 else:
-                    # Load most recently modified profile
                     profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True)
                     profile_file = profiles[0]
                 with open(profile_file, "r", encoding='utf-8') as f:
                     profile_data = json.load(f)
-                # Check session timeout
                 if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
                     raise gr.Error("Session expired. Please start a new session.")
-                # Decrypt encrypted fields
                 if profile_data.get('version', '1.0') == '2.0':
                     try:
                         profile_data['name'] = self.encryptor.decrypt(profile_data['name'])
@@ -1723,7 +1189,6 @@ class EnhancedProfileManager:
                 time.sleep(0.5 * (attempt + 1))
     def list_profiles(self, session_token: str = None) -> List[str]:
-        """List available profiles with decrypted names"""
         if session_token:
             profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
         else:
@@ -1748,22 +1213,18 @@ class EnhancedProfileManager:
         return profile_names
     def delete_profile(self, name: str, session_token: str = None) -> bool:
-        """Delete a profile with verification"""
         try:
             profile_file = self.get_profile_path(name)
             if not profile_file.exists():
                 return False
-            # Verify the profile belongs to the current session
             with open(profile_file, "r", encoding='utf-8') as f:
                 data = json.load(f)
                 if session_token and data.get('session_token') != session_token:
                     return False
-            # Delete local file
             profile_file.unlink()
-            # Try to delete from Hugging Face Hub
             if HF_TOKEN and hf_api:
                 try:
                     hf_api.delete_file(
@@ -1779,10 +1240,10 @@ class EnhancedProfileManager:
             logger.error(f"Error deleting profile: {str(e)}")
             return False
-# Initialize enhanced profile manager
 profile_manager = EnhancedProfileManager()
-# ========== ENHANCED AI TEACHING ASSISTANT ==========
 class EnhancedTeachingAssistant:
     def __init__(self):
         self.context_history = []
@@ -1791,14 +1252,12 @@ class EnhancedTeachingAssistant:
         self.last_model_load_attempt = 0
     async def initialize_model(self):
-        """Lazy initialize the model with retries"""
         if not self.model or not self.tokenizer:
-            if time.time() - self.last_model_load_attempt > 3600:  # Retry every hour if failed
                 self.model, self.tokenizer = get_model_and_tokenizer()
                 self.last_model_load_attempt = time.time()
     async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
-        """Enhanced response generation with context awareness"""
         try:
             await self.initialize_model()
@@ -1808,28 +1267,24 @@ class EnhancedTeachingAssistant:
             self._update_context(message, history)
-            # Get relevant profile information
             student_name = profile.get('name', 'Student')
             gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
             learning_style = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
                                       profile.get('learning_style', ''))
             learning_style = learning_style.group(1) if learning_style else None
-            # Prepare context for the model
             context = f"You are an AI teaching assistant helping {student_name}. "
             if gpa:
                 context += f"{student_name}'s current weighted GPA is {gpa}. "
             if learning_style:
                 context += f"They are a {learning_style.lower()} learner. "
-            # Add recent conversation history
             if self.context_history:
                 context += "Recent conversation:\n"
                 for item in self.context_history[-self.max_context_length:]:
                     role = "Student" if item['role'] == 'user' else "Assistant"
                     context += f"{role}: {item['content']}\n"
-            # Generate response based on query type
             query_type = self._classify_query(message)
             response = await self._generate_typed_response(query_type, message, context, profile)
@@ -1840,7 +1295,6 @@ class EnhancedTeachingAssistant:
             return "I encountered an error processing your request. Please try again."
     def _classify_query(self, message: str) -> str:
-        """Classify the type of user query"""
         message_lower = message.lower()
         if any(word in message_lower for word in ['gpa', 'grade', 'average']):
@@ -1859,7 +1313,6 @@ class EnhancedTeachingAssistant:
             return 'general'
     async def _generate_typed_response(self, query_type: str, message: str, context: str, profile: Dict) -> str:
-        """Generate response based on query type"""
         if query_type == 'gpa':
             return self._generate_gpa_response(profile)
         elif query_type == 'study':
@@ -1876,7 +1329,6 @@ class EnhancedTeachingAssistant:
             return await self._generate_general_response(message, context)
     def _generate_gpa_response(self, profile: Dict) -> str:
-        """Generate response about GPA"""
         gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
         if not gpa:
             return "I couldn't find your GPA information. Please upload your transcript first."
@@ -1902,7 +1354,6 @@ class EnhancedTeachingAssistant:
         return "\n\n".join(response)
     def _generate_study_response(self, profile: Dict) -> str:
-        """Generate study advice based on learning style"""
         learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
                                        profile.get('learning_style', ''))
         if not learning_style_match:
@@ -1918,7 +1369,6 @@ class EnhancedTeachingAssistant:
         if study_plan.get('study_strategies'):
             response.extend([f"- {strategy}" for strategy in study_plan['study_strategies']])
         else:
-            # Fallback if no study plan
             if learning_style.lower() == 'visual':
                 response.extend([
                     "- Use color coding in your notes",
@@ -1951,18 +1401,15 @@ class EnhancedTeachingAssistant:
         return "\n\n".join(response)
     def _generate_courses_response(self, profile: Dict) -> str:
-        """Generate response about current/past courses"""
         transcript = profile.get('transcript', {})
         if not transcript.get('course_history'):
             return "I couldn't find your course information. Please upload your transcript first."
-        # Get current courses (in progress)
         current_courses = [
             course for course in transcript['course_history']
             if course.get('status', '').lower() == 'in progress'
         ]
-        # Get past completed courses
         completed_courses = [
             course for course in transcript['course_history']
             if course.get('status', '').lower() == 'completed'
@@ -1972,7 +1419,7 @@ class EnhancedTeachingAssistant:
         if current_courses:
             response.append("**Your Current Courses:**")
-            for course in current_courses[:5]:  # Limit to 5 courses
                 response.append(
                     f"- {course.get('description', 'Unknown')} "
                     f"({course.get('course_code', '')})"
@@ -1982,7 +1429,7 @@ class EnhancedTeachingAssistant:
         if completed_courses:
             response.append("\n**Recently Completed Courses:**")
-            for course in completed_courses[:5]:  # Limit to 5 courses
                 grade = course.get('grade_earned', '')
                 if grade:
                     response.append(
@@ -1992,7 +1439,6 @@ class EnhancedTeachingAssistant:
                 else:
                     response.append(f"- {course.get('description', 'Unknown')}")
-        # Add rigor analysis
         rigor = academic_analyzer.analyze_course_rigor(transcript)
         if rigor['rating']:
             response.append(f"\n**Course Rigor Analysis:** {rigor['rating']}")
@@ -2003,7 +1449,6 @@ class EnhancedTeachingAssistant:
         return "\n".join(response)
     def _generate_college_response(self, profile: Dict) -> str:
-        """Generate college recommendations"""
         recommendations = academic_analyzer.generate_college_recommendations(profile.get('transcript', {}))
         response = ["**College Recommendations Based on Your Profile:**"]
@@ -2031,7 +1476,6 @@ class EnhancedTeachingAssistant:
         return "\n".join(response)
     def _generate_planning_response(self, profile: Dict) -> str:
-        """Generate study/schedule planning advice"""
         study_plan = profile.get('study_plan', {})
         response = ["**Study Planning Advice:**"]
@@ -2041,7 +1485,7 @@ class EnhancedTeachingAssistant:
             for day, activities in study_plan['weekly_schedule'].items():
                 if activities:
                     response.append(f"\n**{day}:**")
-                    for activity in activities[:2]:  # Show 2 activities per day max
                         response.append(
                             f"- {activity.get('course', 'Course')}: "
                             f"{activity.get('duration', '45-60 minutes')}"
@@ -2059,23 +1503,20 @@ class EnhancedTeachingAssistant:
         return "\n".join(response)
     def _generate_resources_response(self, profile: Dict) -> str:
-        """Generate resource recommendations"""
         study_plan = profile.get('study_plan', {})
         transcript = profile.get('transcript', {})
         response = ["**Recommended Learning Resources:**"]
-        # General resources
         if study_plan.get('resource_recommendations'):
             response.extend([f"- {resource}" for resource in study_plan['resource_recommendations'][:3]])
         else:
             response.extend([
-                "- Khan Academy (free lessons on many subjects)",
-                "- Quizlet (flashcards and study tools)",
                 "- Wolfram Alpha for math help"
             ])
-        # Subject-specific resources
         current_courses = [
             course for course in transcript.get('course_history', [])
             if course.get('status', '').lower() == 'in progress'
@@ -2083,7 +1524,7 @@ class EnhancedTeachingAssistant:
         if current_courses:
             response.append("\n**Course-Specific Resources:**")
-            for course in current_courses[:2]:  # Limit to 2 courses
                 course_name = course.get('description', 'your course')
                 if 'MATH' in course_name.upper():
                     response.append(f"- For {course_name}: Desmos Graphing Calculator, Art of Problem Solving")
@@ -2095,7 +1536,6 @@ class EnhancedTeachingAssistant:
         return "\n".join(response)
     async def _generate_general_response(self, message: str, context: str) -> str:
-        """Generate response using the language model"""
         if not self.model or not self.tokenizer:
             return "I'm still loading my knowledge base. Please try again in a moment."
@@ -2104,7 +1544,6 @@ class EnhancedTeachingAssistant:
             inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-            # Generate response with more controlled parameters
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=200,
@@ -2116,10 +1555,8 @@ class EnhancedTeachingAssistant:
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract just the assistant's response
             response = response[len(prompt):].strip()
-            # Clean up any incomplete sentences
             if response and response[-1] not in {'.', '!', '?'}:
                 last_period = response.rfind('.')
                 if last_period > 0:
@@ -2131,7 +1568,6 @@ class EnhancedTeachingAssistant:
             return "I encountered an error generating a response. Please try again."
     def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
-        """Update conversation context"""
         self.context_history.append({"role": "user", "content": message})
         if history:
@@ -2141,290 +1577,97 @@ class EnhancedTeachingAssistant:
                 if h[1]:
                     self.context_history.append({"role": "assistant", "content": h[1]})
-        # Trim to max context length
         self.context_history = self.context_history[-(self.max_context_length * 2):]
-# Initialize enhanced teaching assistant
 teaching_assistant = EnhancedTeachingAssistant()
-# ========== STUDY CALENDAR INTEGRATION ==========
-class StudyCalendar:
-    def __init__(self):
-        self.calendar_events = {}
-    def generate_study_calendar(self, profile: Dict, start_date: str = None, weeks: int = 4) -> Dict:
-        """Generate a study calendar for the given profile"""
-        try:
-            if not start_date:
-                start_date = datetime.date.today().isoformat()
-            start_date = datetime.date.fromisoformat(start_date)
-            study_plan = profile.get('study_plan', {})
-            calendar = {
-                'start_date': start_date.isoformat(),
-                'end_date': (start_date + datetime.timedelta(weeks=weeks)).isoformat(),
-                'events': [],
-                'exams': [],
-                'assignments': []
-            }
-            # Add study sessions from the study plan
-            if study_plan.get('weekly_schedule'):
-                for day_offset in range(weeks * 7):
-                    current_date = start_date + datetime.timedelta(days=day_offset)
-                    day_name = calendar.day_name[current_date.weekday()]
-                    if day_name in study_plan['weekly_schedule']:
-                        for session in study_plan['weekly_schedule'][day_name]:
-                            calendar['events'].append({
-                                'date': current_date.isoformat(),
-                                'title': f"Study {session.get('course', '')}",
-                                'description': "\n".join(session.get('activities', [])),
-                                'duration': session.get('duration', '45-60 minutes'),
-                                'type': 'study'
-                            })
-            # Add exam dates from transcript (if available)
-            transcript = profile.get('transcript', {})
-            if transcript.get('course_history'):
-                for course in transcript['course_history']:
-                    if course.get('status', '').lower() == 'in progress':
-                        # Simulate some exam dates (in a real app, these would come from the school calendar)
-                        midterm_date = (start_date + datetime.timedelta(weeks=2)).isoformat()
-                        final_date = (start_date + datetime.timedelta(weeks=weeks - 1)).isoformat()
-                        calendar['exams'].append({
-                            'date': midterm_date,
-                            'title': f"{course.get('description', 'Course')} Midterm",
-                            'course': course.get('description', ''),
-                            'type': 'exam'
-                        })
-                        calendar['exams'].append({
-                            'date': final_date,
-                            'title': f"{course.get('description', 'Course')} Final",
-                            'course': course.get('description', ''),
-                            'type': 'exam'
-                        })
-            return calendar
-        except Exception as e:
-            logger.error(f"Error generating calendar: {str(e)}")
-            return {
-                'start_date': datetime.date.today().isoformat(),
-                'end_date': (datetime.date.today() + datetime.timedelta(weeks=4)).isoformat(),
-                'events': [],
-                'exams': [],
-                'assignments': []
-            }
-    def create_calendar_visualization(self, calendar_data: Dict) -> Optional[plt.Figure]:
-        """Create a visualization of the study calendar"""
-        try:
-            import matplotlib.pyplot as plt
-            from matplotlib.patches import Rectangle
-            # Prepare data
-            start_date = datetime.date.fromisoformat(calendar_data['start_date'])
-            end_date = datetime.date.fromisoformat(calendar_data['end_date'])
-            days = (end_date - start_date).days + 1
-            # Create figure
-            fig, ax = plt.subplots(figsize=(12, 6))
-            # Draw week grid
-            for i in range(0, days, 7):
-                ax.add_patch(Rectangle((i, 0), 7, 1, color='#f5f5f5'))
-            # Add study events
-            for event in calendar_data['events']:
-                event_date = datetime.date.fromisoformat(event['date'])
-                day_offset = (event_date - start_date).days
-                ax.add_patch(Rectangle((day_offset, 0.7), 1, 0.3, color='#4CAF50'))
-            # Add exams
-            for exam in calendar_data['exams']:
-                exam_date = datetime.date.fromisoformat(exam['date'])
-                day_offset = (exam_date - start_date).days
-                ax.add_patch(Rectangle((day_offset, 0.3), 1, 0.3, color='#F44336'))
-            # Configure axes
-            ax.set_xlim(0, days)
-            ax.set_ylim(0, 1)
-            ax.set_xticks(range(0, days, 7))
-            ax.set_xticklabels([(start_date + datetime.timedelta(days=x)).strftime('%b %d')
-                               for x in range(0, days, 7)])
-            ax.set_yticks([0.5])
-            ax.set_yticklabels(['Study Calendar'])
-            # Add legend
-            ax.add_patch(Rectangle((days-5, 0.7), 1, 0.3, color='#4CAF50'))
-            ax.text(days-3.5, 0.85, 'Study Sessions', va='center')
-            ax.add_patch(Rectangle((days-5, 0.3), 1, 0.3, color='#F44336'))
-            ax.text(days-3.5, 0.45, 'Exams', va='center')
-            plt.title(f"Study Calendar: {start_date.strftime('%b %d')} to {end_date.strftime('%b %d')}")
-            plt.tight_layout()
-            return fig
-        except Exception as e:
-            logger.error(f"Error creating calendar visualization: {str(e)}")
-            return None
-# Initialize study calendar
-study_calendar = StudyCalendar()
-# ========== GOAL TRACKING SYSTEM ==========
-class GoalTracker:
-    def __init__(self):
-        self.goals = {}
-    def add_goal(self, profile_name: str, goal_type: str, description: str,
-                target_date: str, target_value: float = None) -> bool:
-        """Add a new goal for the student"""
-        try:
-            goal_id = hashlib.sha256(f"{profile_name}{goal_type}{description}{time.time()}".encode()).hexdigest()[:16]
-            self.goals[goal_id] = {
-                'profile_name': profile_name,
-                'type': goal_type,
-                'description': description,
-                'target_date': target_date,
-                'target_value': target_value,
-                'created': time.time(),
-                'progress': [],
-                'completed': False
-            }
-            return True
-        except Exception as e:
-            logger.error(f"Error adding goal: {str(e)}")
-            return False
-    def update_goal_progress(self, goal_id: str, progress_value: float, notes: str = "") -> bool:
-        """Update progress toward a goal"""
-        try:
-            if goal_id not in self.goals:
-                return False
-            self.goals[goal_id]['progress'].append({
-                'date': time.time(),
-                'value': progress_value,
-                'notes': notes
-            })
-            # Check if goal is completed
-            if self.goals[goal_id].get('target_value') is not None:
-                if progress_value >= self.goals[goal_id]['target_value']:
-                    self.goals[goal_id]['completed'] = True
-            return True
-        except Exception as e:
-            logger.error(f"Error updating goal: {str(e)}")
-            return False
-    def get_goals(self, profile_name: str) -> List[Dict]:
-        """Get all goals for a student"""
-        return [
-            {**goal, 'id': goal_id}
-            for goal_id, goal in self.goals.items()
-            if goal['profile_name'] == profile_name
-        ]
-    def create_goal_visualization(self, goals: List[Dict]) -> Optional[plt.Figure]:
-        """Create a visualization of goal progress"""
-        try:
-            import matplotlib.pyplot as plt
-            if not goals:
-                return None
-            # Prepare data
-            goal_names = [goal['description'][:20] + ('...' if len(goal['description']) > 20 else '')
-                         for goal in goals]
-            progress_values = [
-                goal['progress'][-1]['value'] if goal['progress'] else 0
-                for goal in goals
-            ]
-            target_values = [
-                goal['target_value'] if goal['target_value'] is not None else progress_values[i]
-                for i, goal in enumerate(goals)
-            ]
-            # Create figure
-            fig, ax = plt.subplots(figsize=(10, 6))
-            # Plot bars
-            x = range(len(goals))
-            bar_width = 0.35
-            progress_bars = ax.bar(
-                [i - bar_width/2 for i in x],
-                progress_values,
-                bar_width,
-                label='Current Progress',
-                color='#4CAF50'
-            )
-            target_bars = ax.bar(
-                [i + bar_width/2 for i in x],
-                target_values,
-                bar_width,
-                label='Target',
-                color='#2196F3'
-            )
-            # Add labels and title
-            ax.set_xlabel('Goals')
-            ax.set_ylabel('Progress')
-            ax.set_title('Goal Progress Tracking')
-            ax.set_xticks(x)
-            ax.set_xticklabels(goal_names, rotation=45, ha='right')
-            ax.legend()
-            # Add value labels
-            for bar in progress_bars:
-                height = bar.get_height()
-                ax.annotate(f'{height:.1f}',
-                            xy=(bar.get_x() + bar.get_width() / 2, height),
-                            xytext=(0, 3),
-                            textcoords="offset points",
-                            ha='center', va='bottom')
-            for bar in target_bars:
-                height = bar.get_height()
-                ax.annotate(f'{height:.1f}',
-                            xy=(bar.get_x() + bar.get_width() / 2, height),
-                            xytext=(0, 3),
-                            textcoords="offset points",
-                            ha='center', va='bottom')
-            plt.tight_layout()
-            return fig
-        except Exception as e:
-            logger.error(f"Error creating goal visualization: {str(e)}")
-            return None
-# Initialize goal tracker
-goal_tracker = GoalTracker()
-# ========== ENHANCED GRADIO INTERFACE ==========
 def create_enhanced_interface():
     with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
         session_token = gr.State(value=generate_session_token())
         profile_manager.set_session(session_token.value)
         tab_completed = gr.State({
-            0: False,  # Transcript Upload
-            1: False,  # Learning Style Quiz
-            2: False,  # Personal Questions
-            3: False,  # Save & Review
-            4: False,  # AI Assistant
-            5: False   # Goals & Planning
         })
-        # Custom CSS with enhanced styling
         app.css = """
         .gradio-container {
             max-width: 1200px !important;
@@ -2537,7 +1780,6 @@ def create_enhanced_interface():
             border-left: 4px solid #2196F3;
         }
-        /* Dark mode styles */
         .dark .tab-content {
             background-color: #2d2d2d !important;
             border-color: #444 !important;
@@ -2579,7 +1821,6 @@ def create_enhanced_interface():
         }
         """
-        # Header with improved layout
         with gr.Row():
             with gr.Column(scale=4):
                 gr.Markdown("""
@@ -2590,7 +1831,6 @@ def create_enhanced_interface():
             with gr.Column(scale=1):
                 dark_mode = gr.Checkbox(label="Dark Mode", value=False)
-        # Navigation buttons with icons
         with gr.Row():
             with gr.Column(scale=1, min_width=100):
                 step1 = gr.Button("📄 1. Transcript", elem_classes="incomplete-tab")
@@ -2607,9 +1847,7 @@ def create_enhanced_interface():
         nav_message = gr.HTML(visible=False)
-        # Main tabs
         with gr.Tabs(visible=True) as tabs:
-            # ===== TAB 1: TRANSCRIPT UPLOAD =====
             with gr.Tab("Transcript", id=0):
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -2650,15 +1888,12 @@ def create_enhanced_interface():
                 def process_and_visualize(file_obj, tab_status):
                     try:
-                        # Parse transcript with enhanced parser
-                        parsed_data = transcript_parser.parse_transcript(file_obj.name, os.path.splitext(file_obj.name)[1].lower())
-                        # Generate analyses
                         gpa_analysis = academic_analyzer.analyze_gpa(parsed_data)
                         grad_status = academic_analyzer.analyze_graduation_status(parsed_data)
                         college_recs = academic_analyzer.generate_college_recommendations(parsed_data)
-                        # Format results
                         results = [
                             f"## 📊 GPA Analysis",
                             f"**Rating:** {gpa_analysis['rating']}",
@@ -2688,7 +1923,6 @@ def create_enhanced_interface():
                             results.append("\n**Improvement Tips:**")
                             results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']])
-                        # Update visualizations
                         viz_updates = [
                             gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None),
                             gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None),
@@ -2696,7 +1930,6 @@ def create_enhanced_interface():
                             gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None)
                         ]
-                        # Update tab completion status
                         tab_status[0] = True
                         return "\n".join(results), parsed_data, *viz_updates, tab_status
@@ -2717,7 +1950,6 @@ def create_enhanced_interface():
                     outputs=step2
                 )
-            # ===== TAB 2: LEARNING STYLE QUIZ =====
             with gr.Tab("Learning Style Quiz", id=1):
                 with gr.Column():
                     gr.Markdown("### 📝 Step 2: Discover Your Learning Style")
@@ -2783,7 +2015,6 @@ def create_enhanced_interface():
                     outputs=progress
                 )
-            # ===== TAB 3: PERSONAL QUESTIONS =====
             with gr.Tab("Personal Profile", id=2):
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -2829,7 +2060,6 @@ def create_enhanced_interface():
                     outputs=[tab_completed, step3, step4, save_confirmation]
                 )
-            # ===== TAB 4: SAVE & REVIEW =====
             with gr.Tab("Save Profile", id=3):
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -2929,12 +2159,10 @@ def create_enhanced_interface():
                     ]
                 )
-            # ===== TAB 5: AI ASSISTANT =====
             with gr.Tab("AI Assistant", id=4):
                 gr.Markdown("## 💬 Your Personalized Learning Assistant")
                 gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
-                # Create custom chatbot interface
                 chatbot = gr.Chatbot(height=500)
                 msg = gr.Textbox(label="Your Message")
                 clear = gr.Button("Clear")
@@ -2947,7 +2175,6 @@ def create_enhanced_interface():
                 msg.submit(respond, [msg, chatbot], [msg, chatbot])
                 clear.click(lambda: None, None, chatbot, queue=False)
-            # ===== TAB 6: GOALS & PLANNING =====
             with gr.Tab("Goals & Planning", id=5):
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -2976,7 +2203,6 @@ def create_enhanced_interface():
                         calendar_output = gr.HTML()
                         calendar_viz = gr.Plot(label="Calendar Visualization", visible=False)
-                # Show/hide target value based on goal type
                 goal_type.change(
                     fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]),
                     inputs=goal_type,
@@ -3029,7 +2255,6 @@ def create_enhanced_interface():
                     calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat())
-                    # Create HTML display
                     calendar_html = []
                     current_date = datetime.date.fromisoformat(calendar['start_date'])
                     end_date = datetime.date.fromisoformat(calendar['end_date'])
@@ -3073,7 +2298,6 @@ def create_enhanced_interface():
                         gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None)
                     )
-                # Add goal functionality
                 add_goal_btn.click(
                     fn=lambda gt, desc, date, val: (
                         goal_tracker.add_goal(name.value, gt, desc, date, val),
@@ -3091,16 +2315,13 @@ def create_enhanced_interface():
                     outputs=[goals_output, goal_viz]
                 )
-                # Generate calendar functionality
                 generate_calendar_btn.click(
                     fn=lambda date: update_calendar_display(name.value, date),
                     inputs=calendar_start_date,
                     outputs=[calendar_output, calendar_viz]
                 )
-        # Navigation logic
         def navigate_to_tab(tab_index: int, tab_completed_status: dict):
-            # Check if all previous tabs are completed
             for i in range(tab_index):
                 if not tab_completed_status.get(i, False):
                     messages = [
@@ -3111,7 +2332,7 @@ def create_enhanced_interface():
                         "Please complete the previous steps first"
                     ]
                     return (
-                        gr.Tabs(selected=i),  # Go to first incomplete tab
                         gr.update(
                             value=f"<div class='error-message'>⛔ {messages[i]}</div>",
                             visible=True
@@ -3151,7 +2372,6 @@ def create_enhanced_interface():
             outputs=[tabs, nav_message]
         )
-        # Dark mode toggle
         def toggle_dark_mode(dark):
             return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
@@ -3161,7 +2381,6 @@ def create_enhanced_interface():
             outputs=None
         )
-        # Load model on startup
         app.load(fn=lambda: model_loader.load_model(), outputs=[])
     return app

 # Enhanced Configuration
 PROFILES_DIR = "student_profiles"
 ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"]
+MAX_FILE_SIZE_MB = 10
 MIN_AGE = 5
 MAX_AGE = 120
 SESSION_TOKEN_LENGTH = 32
 HF_TOKEN = os.getenv("HF_TOKEN")
 ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode())
+SESSION_TIMEOUT = 3600 * 3
 MAX_CONTEXT_HISTORY = 10
 MAX_PROFILE_LOAD_ATTEMPTS = 3
+# Initialize logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 )
 logger = logging.getLogger(__name__)
+# Model configuration
+MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
+# Initialize Hugging Face API
 if HF_TOKEN:
     hf_api = None
     for attempt in range(3):
             break
         except Exception as e:
             logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
+            time.sleep(2 ** attempt)
 # ========== LEARNING STYLE QUIZ ==========
 class LearningStyleQuiz:
             'kinesthetic': 0
         }
         for answer in answers:
             if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower():
                 style_counts['visual'] += 1
         primary_style = max(style_counts, key=style_counts.get)
         secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3]
         result = [
             "## 🎯 Your Learning Style Results",
             f"Your primary learning style is **{primary_style.capitalize()}**",
 # Initialize learning style quiz
 learning_style_quiz = LearningStyleQuiz()
+# ========== MODEL LOADER ==========
 class ModelLoader:
     def __init__(self):
         self.model = None
         self.max_retries = 3
     def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
         if self.loaded:
             return self.model, self.tokenizer
             if progress:
                 progress(0.1, desc="Initializing model environment...")
             if self.device == "cuda":
                 torch.cuda.empty_cache()
                 torch.cuda.reset_peak_memory_stats()
             if progress:
                 progress(0.2, desc="Loading tokenizer...")
             tokenizer = None
             for attempt in range(3):
                 try:
             if progress:
                 progress(0.5, desc="Loading model (this may take a few minutes)...")
             model_kwargs = {
                 "trust_remote_code": True,
                 "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
                 "offload_folder": "offload"
             }
             if torch.cuda.device_count() > 1:
                 model_kwargs["max_memory"] = {i: "20GiB" for i in range(torch.cuda.device_count())}
                     logger.warning(f"Model loading attempt {attempt + 1} failed: {str(e)}")
                     time.sleep(2 ** attempt)
             if progress:
                 progress(0.8, desc="Verifying model...")
             test_input = tokenizer("Test", return_tensors="pt").to(self.device)
 def get_model_and_tokenizer():
     return model_loader.load_model()
+# ========== TRANSCRIPT PARSER ==========
+class MiamiDadeTranscriptParser:
     def __init__(self):
+        self.student_info_pattern = re.compile(
+            r"(\d{7}) - (.*?)\s*\|\s*Current Grade:\s*(\d+)\s*\|\s*YOG\s*(\d{4})"
+            r"\s*\|\s*Weighted GPA\s*([\d.]+)\s*\|\s*Comm Serv Date\s*(\d{2}/\d{2}/\d{4})"
+            r"\s*\|\s*Total Credits Earned\s*([\d.]+)"
+        )
+        self.requirement_pattern = re.compile(
+            r"([A-Z]-[A-Za-z ]+)\s*\|\s*([^|]+)\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([^|]+)%"
+        )
+        self.course_pattern = re.compile(
+            r"([A-Z]-[A-Za-z ]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^|]+)\|"
+            r"\s*([A-Z0-9])\s*\|\s*(\d+)\s*\|\s*([A-Z])\s*\|\s*([A-Z])\s*\|\s*([\d.]+|inProgress)"
+        )
+    def parse_transcript(self, file_path: str) -> Dict:
+        """Parse Miami-Dade County transcript PDF"""
+        with pdfplumber.open(file_path) as pdf:
+            text = "\n".join(page.extract_text() for page in pdf.pages)
             parsed_data = {
+                'student_info': self._parse_student_info(text),
+                'requirements': self._parse_requirements(text),
+                'course_history': self._parse_courses(text)
             }
             return parsed_data
+    def _parse_student_info(self, text: str) -> Dict:
+        """Extract student information"""
+        match = self.student_info_pattern.search(text)
+        if not match:
+            return {}
+        return {
+            'id': match.group(1),
+            'name': match.group(2).strip(),
+            'grade': match.group(3),
+            'year_of_graduation': match.group(4),
+            'weighted_gpa': float(match.group(5)),
+            'community_service_date': match.group(6),
+            'total_credits': float(match.group(7)),
+            'district': 'Miami-Dade'
+        }
+    def _parse_requirements(self, text: str) -> Dict:
+        """Parse graduation requirements section"""
+        requirements = {}
+        for match in self.requirement_pattern.finditer(text):
+            requirements[match.group(1).strip()] = {
+                'description': match.group(2).strip(),
+                'required': float(match.group(3)),
+                'waived': float(match.group(4)),
+                'completed': float(match.group(5)),
+                'percent_complete': float(match.group(6))
             }
+        return requirements
+    def _parse_courses(self, text: str) -> List[Dict]:
+        """Parse course history section"""
+        courses = []
+        for match in self.course_pattern.finditer(text):
+            courses.append({
+                'requirement': match.group(1).strip(),
+                'school_year': match.group(2),
+                'grade_level': match.group(3),
+                'course_code': match.group(4),
+                'description': match.group(5).strip(),
+                'term': match.group(6),
+                'district_number': match.group(7),
+                'included': match.group(8),
+                'credits': 0 if 'inProgress' in match.group(9) else float(match.group(9)),
+                'status': 'In Progress' if 'inProgress' in match.group(9) else 'Completed'
+            })
+        return courses
+# Initialize transcript parser
+transcript_parser = MiamiDadeTranscriptParser()
+# ========== ACADEMIC ANALYZER ==========
 class AcademicAnalyzer:
     def __init__(self):
         self.gpa_scale = {
         }
     def analyze_gpa(self, parsed_data: Dict) -> Dict:
         analysis = {
             'rating': '',
             'description': '',
                     "Focus on fundamental study skills"
                 ]
             if weighted_gpa > 0 and unweighted_gpa > 0:
                 diff = weighted_gpa - unweighted_gpa
                 if diff > 0.5:
             }
     def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
         analysis = {
             'status': '',
             'completion_percentage': 0,
             analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0
             analysis['missing_requirements'] = [
                 {
                     'code': code,
                 if req and float(req.get('completed', 0)) < float(req.get('required', 0))
             ]
             current_grade = parsed_data.get('student_info', {}).get('grade', '')
             grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '')
                 analysis['status'] = f"❌ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed."
                 analysis['on_track'] = False
             if current_grade and grad_year:
                 remaining_credits = total_required - total_completed
                 years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade)
             }
     def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
         analysis = {
             'advanced_courses': 0,
             'honors_courses': 0,
             }
     def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
         recommendations = {
             'reach': [],
             'target': [],
         }
         try:
             weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
             rigor_analysis = self.analyze_course_rigor(parsed_data)
             service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0))
             if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100:
                 recommendations['reach'].extend([
                     "Ivy League: Harvard, Yale, Princeton, Columbia, etc.",
                     "Technical Schools"
                 ])
             if weighted_gpa >= 4.0:
                 recommendations['scholarships'].extend([
                     "National Merit Scholarship",
                     "First-Generation Student Programs"
                 ])
             if weighted_gpa < 3.5:
                 recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring")
             if rigor_analysis['advanced_courses'] < 4:
             }
     def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
         plan = {
             'weekly_schedule': {},
             'study_strategies': [],
         }
         try:
             current_courses = [
                 course for course in parsed_data.get('course_history', [])
                 if course.get('status', '').lower() == 'in progress'
             ]
             days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
             for day in days:
                 plan['weekly_schedule'][day] = []
+            study_blocks = 2
             if learning_style.lower() == 'visual':
                 study_blocks = 3
                 plan['study_strategies'].extend([
                     "Use hands-on activities when possible"
                 ])
             for i, course in enumerate(current_courses):
+                day_index = i % 5
                 day = days[day_index]
                 plan['weekly_schedule'][day].append({
                     'course': course.get('description', 'Course'),
                     ]
                 })
             plan['time_management_tips'].extend([
                 "Use the Pomodoro technique (25 min study, 5 min break)",
                 "Prioritize assignments by due date and importance",
                 "Schedule regular review sessions"
             ])
             plan['resource_recommendations'].extend([
                 "Khan Academy for math and science",
                 "Quizlet for flashcards",
 # Initialize academic analyzer
 academic_analyzer = AcademicAnalyzer()
+# ========== DATA VISUALIZER ==========
 class DataVisualizer:
     def __init__(self):
         self.color_palette = {
         }
     def create_gpa_visualization(self, parsed_data: Dict):
         try:
             gpa_data = {
                 "Type": ["Weighted GPA", "Unweighted GPA"],
                 hover_data={"Type": True, "Value": ":.2f"}
             )
             fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left")
             fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left")
             fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left")
             return None
     def create_requirements_visualization(self, parsed_data: Dict):
         try:
             req_data = []
             for code, req in parsed_data.get('requirements', {}).items():
             return None
     def create_credits_distribution_visualization(self, parsed_data: Dict):
         try:
             core_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
+                if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D']
             )
             elective_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
+                if req and req.get('code', '').split('-')[0] in ['G', 'H']
             )
             other_credits = sum(
                 req['completed'] for req in parsed_data.get('requirements', {}).values()
+                if req and req.get('code', '').split('-')[0] in ['E', 'F']
             )
             credit_values = [core_credits, elective_credits, other_credits]
             return None
     def create_course_rigor_visualization(self, parsed_data: Dict):
         try:
             rigor = academic_analyzer.analyze_course_rigor(parsed_data)
 # Initialize visualizer
 data_visualizer = DataVisualizer()
+# ========== PROFILE MANAGER ==========
 class EnhancedProfileManager:
     def __init__(self):
         self.profiles_dir = Path(PROFILES_DIR)
                     movie: str, movie_reason: str, show: str, show_reason: str,
                     book: str, book_reason: str, character: str, character_reason: str,
                     blog: str, study_plan: Dict = None) -> str:
         try:
             name = validate_name(name)
             age = validate_age(age)
             if not learning_style or "Your primary learning style is" not in learning_style:
                 raise ValueError("Please complete the learning style quiz first.")
             favorites = {
                 "movie": sanitize_input(movie),
                 "movie_reason": sanitize_input(movie_reason),
                 "character_reason": sanitize_input(character_reason)
             }
             if not study_plan:
                 learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style)
                 if learning_style_match:
                         transcript,
                         learning_style_match.group(1))
             data = {
                 "name": self.encryptor.encrypt(name),
                 "age": age,
                 "interests": self.encryptor.encrypt(sanitize_input(interests)),
+                "transcript": transcript,
                 "learning_style": learning_style,
                 "favorites": favorites,
                 "blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "",
                 "study_plan": study_plan if study_plan else {},
                 "session_token": self.current_session,
                 "last_updated": time.time(),
+                "version": "2.0"
             }
             filepath = self.get_profile_path(name)
             temp_path = filepath.with_suffix('.tmp')
             with open(temp_path, "w", encoding='utf-8') as f:
                 json.dump(data, f, indent=2, ensure_ascii=False)
+            temp_path.replace(filepath)
             if HF_TOKEN and hf_api:
                 try:
                     hf_api.upload_file(
             raise gr.Error(f"Couldn't save profile: {str(e)}")
     def load_profile(self, name: str = None, session_token: str = None) -> Dict:
         for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS):
             try:
                 if session_token:
                 if name:
                     profile_file = self.get_profile_path(name)
                     if not profile_file.exists():
                         if HF_TOKEN and hf_api:
                             try:
                                 hf_api.download_file(
                         else:
                             raise gr.Error(f"No profile found for {name}")
                 else:
                     profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True)
                     profile_file = profiles[0]
                 with open(profile_file, "r", encoding='utf-8') as f:
                     profile_data = json.load(f)
                 if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
                     raise gr.Error("Session expired. Please start a new session.")
                 if profile_data.get('version', '1.0') == '2.0':
                     try:
                         profile_data['name'] = self.encryptor.decrypt(profile_data['name'])
                 time.sleep(0.5 * (attempt + 1))
     def list_profiles(self, session_token: str = None) -> List[str]:
         if session_token:
             profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
         else:
         return profile_names
     def delete_profile(self, name: str, session_token: str = None) -> bool:
         try:
             profile_file = self.get_profile_path(name)
             if not profile_file.exists():
                 return False
             with open(profile_file, "r", encoding='utf-8') as f:
                 data = json.load(f)
                 if session_token and data.get('session_token') != session_token:
                     return False
             profile_file.unlink()
             if HF_TOKEN and hf_api:
                 try:
                     hf_api.delete_file(
             logger.error(f"Error deleting profile: {str(e)}")
             return False
+# Initialize profile manager
 profile_manager = EnhancedProfileManager()
+# ========== TEACHING ASSISTANT ==========
 class EnhancedTeachingAssistant:
     def __init__(self):
         self.context_history = []
         self.last_model_load_attempt = 0
     async def initialize_model(self):
         if not self.model or not self.tokenizer:
+            if time.time() - self.last_model_load_attempt > 3600:
                 self.model, self.tokenizer = get_model_and_tokenizer()
                 self.last_model_load_attempt = time.time()
     async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
         try:
             await self.initialize_model()
             self._update_context(message, history)
             student_name = profile.get('name', 'Student')
             gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
             learning_style = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
                                       profile.get('learning_style', ''))
             learning_style = learning_style.group(1) if learning_style else None
             context = f"You are an AI teaching assistant helping {student_name}. "
             if gpa:
                 context += f"{student_name}'s current weighted GPA is {gpa}. "
             if learning_style:
                 context += f"They are a {learning_style.lower()} learner. "
             if self.context_history:
                 context += "Recent conversation:\n"
                 for item in self.context_history[-self.max_context_length:]:
                     role = "Student" if item['role'] == 'user' else "Assistant"
                     context += f"{role}: {item['content']}\n"
             query_type = self._classify_query(message)
             response = await self._generate_typed_response(query_type, message, context, profile)
             return "I encountered an error processing your request. Please try again."
     def _classify_query(self, message: str) -> str:
         message_lower = message.lower()
         if any(word in message_lower for word in ['gpa', 'grade', 'average']):
             return 'general'
     async def _generate_typed_response(self, query_type: str, message: str, context: str, profile: Dict) -> str:
         if query_type == 'gpa':
             return self._generate_gpa_response(profile)
         elif query_type == 'study':
             return await self._generate_general_response(message, context)
     def _generate_gpa_response(self, profile: Dict) -> str:
         gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None)
         if not gpa:
             return "I couldn't find your GPA information. Please upload your transcript first."
         return "\n\n".join(response)
     def _generate_study_response(self, profile: Dict) -> str:
         learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
                                        profile.get('learning_style', ''))
         if not learning_style_match:
         if study_plan.get('study_strategies'):
             response.extend([f"- {strategy}" for strategy in study_plan['study_strategies']])
         else:
             if learning_style.lower() == 'visual':
                 response.extend([
                     "- Use color coding in your notes",
         return "\n\n".join(response)
     def _generate_courses_response(self, profile: Dict) -> str:
         transcript = profile.get('transcript', {})
         if not transcript.get('course_history'):
             return "I couldn't find your course information. Please upload your transcript first."
         current_courses = [
             course for course in transcript['course_history']
             if course.get('status', '').lower() == 'in progress'
         ]
         completed_courses = [
             course for course in transcript['course_history']
             if course.get('status', '').lower() == 'completed'
         if current_courses:
             response.append("**Your Current Courses:**")
+            for course in current_courses[:5]:
                 response.append(
                     f"- {course.get('description', 'Unknown')} "
                     f"({course.get('course_code', '')})"
         if completed_courses:
             response.append("\n**Recently Completed Courses:**")
+            for course in completed_courses[:5]:
                 grade = course.get('grade_earned', '')
                 if grade:
                     response.append(
                 else:
                     response.append(f"- {course.get('description', 'Unknown')}")
         rigor = academic_analyzer.analyze_course_rigor(transcript)
         if rigor['rating']:
             response.append(f"\n**Course Rigor Analysis:** {rigor['rating']}")
         return "\n".join(response)
     def _generate_college_response(self, profile: Dict) -> str:
         recommendations = academic_analyzer.generate_college_recommendations(profile.get('transcript', {}))
         response = ["**College Recommendations Based on Your Profile:**"]
         return "\n".join(response)
     def _generate_planning_response(self, profile: Dict) -> str:
         study_plan = profile.get('study_plan', {})
         response = ["**Study Planning Advice:**"]
             for day, activities in study_plan['weekly_schedule'].items():
                 if activities:
                     response.append(f"\n**{day}:**")
+                    for activity in activities[:2]:
                         response.append(
                             f"- {activity.get('course', 'Course')}: "
                             f"{activity.get('duration', '45-60 minutes')}"
         return "\n".join(response)
     def _generate_resources_response(self, profile: Dict) -> str:
         study_plan = profile.get('study_plan', {})
         transcript = profile.get('transcript', {})
         response = ["**Recommended Learning Resources:**"]
         if study_plan.get('resource_recommendations'):
             response.extend([f"- {resource}" for resource in study_plan['resource_recommendations'][:3]])
         else:
             response.extend([
+                "- Khan Academy for math and science",
+                "- Quizlet for flashcards",
                 "- Wolfram Alpha for math help"
             ])
         current_courses = [
             course for course in transcript.get('course_history', [])
             if course.get('status', '').lower() == 'in progress'
         if current_courses:
             response.append("\n**Course-Specific Resources:**")
+            for course in current_courses[:2]:
                 course_name = course.get('description', 'your course')
                 if 'MATH' in course_name.upper():
                     response.append(f"- For {course_name}: Desmos Graphing Calculator, Art of Problem Solving")
         return "\n".join(response)
     async def _generate_general_response(self, message: str, context: str) -> str:
         if not self.model or not self.tokenizer:
             return "I'm still loading my knowledge base. Please try again in a moment."
             inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=200,
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = response[len(prompt):].strip()
             if response and response[-1] not in {'.', '!', '?'}:
                 last_period = response.rfind('.')
                 if last_period > 0:
             return "I encountered an error generating a response. Please try again."
     def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
         self.context_history.append({"role": "user", "content": message})
         if history:
                 if h[1]:
                     self.context_history.append({"role": "assistant", "content": h[1]})
         self.context_history = self.context_history[-(self.max_context_length * 2):]
+# Initialize teaching assistant
 teaching_assistant = EnhancedTeachingAssistant()
+# ========== UTILITY FUNCTIONS ==========
+class DataEncryptor:
+    def __init__(self, key: str):
+        self.cipher = Fernet(key.encode())
+    def encrypt(self, data: str) -> str:
+        return self.cipher.encrypt(data.encode()).decode()
+    def decrypt(self, encrypted_data: str) -> str:
+        return self.cipher.decrypt(encrypted_data.encode()).decode()
+encryptor = DataEncryptor(ENCRYPTION_KEY)
+def generate_session_token() -> str:
+    alphabet = string.ascii_letters + string.digits
+    return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
+def sanitize_input(text: str) -> str:
+    if not text:
+        return ""
+    text = html.escape(text.strip())
+    text = re.sub(r'<[^>]*>', '', text)
+    text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
+    return text
+def validate_name(name: str) -> str:
+    name = name.strip()
+    if not name:
+        raise ValueError("Name cannot be empty.")
+    if len(name) > 100:
+        raise ValueError("Name is too long (maximum 100 characters).")
+    if any(c.isdigit() for c in name):
+        raise ValueError("Name cannot contain numbers.")
+    return name
+def validate_age(age: Union[int, float, str]) -> int:
+    try:
+        age_int = int(age)
+        if not MIN_AGE <= age_int <= MAX_AGE:
+            raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
+        return age_int
+    except (ValueError, TypeError):
+        raise ValueError("Please enter a valid age number.")
+def validate_file(file_obj) -> None:
+    if not file_obj:
+        raise ValueError("Please upload a file first")
+    file_ext = os.path.splitext(file_obj.name)[1].lower()
+    if file_ext not in ALLOWED_FILE_TYPES:
+        raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
+    file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
+    if file_size > MAX_FILE_SIZE_MB:
+        raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
+def remove_sensitive_info(text: str) -> str:
+    patterns = [
+        (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
+        (r'\b\d{6,9}\b', '[ID]'),
+        (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
+        (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'),
+        (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),
+        (r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'),
+        (r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]')
+    ]
+    for pattern, replacement in patterns:
+        text = re.sub(pattern, replacement, text)
+    return text
+# ========== GRADIO INTERFACE ==========
 def create_enhanced_interface():
     with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
         session_token = gr.State(value=generate_session_token())
         profile_manager.set_session(session_token.value)
         tab_completed = gr.State({
+            0: False,
+            1: False,
+            2: False,
+            3: False,
+            4: False,
+            5: False
         })
         app.css = """
         .gradio-container {
             max-width: 1200px !important;
             border-left: 4px solid #2196F3;
         }
         .dark .tab-content {
             background-color: #2d2d2d !important;
             border-color: #444 !important;
         }
         """
         with gr.Row():
             with gr.Column(scale=4):
                 gr.Markdown("""
             with gr.Column(scale=1):
                 dark_mode = gr.Checkbox(label="Dark Mode", value=False)
         with gr.Row():
             with gr.Column(scale=1, min_width=100):
                 step1 = gr.Button("📄 1. Transcript", elem_classes="incomplete-tab")
         nav_message = gr.HTML(visible=False)
         with gr.Tabs(visible=True) as tabs:
             with gr.Tab("Transcript", id=0):
                 with gr.Row():
                     with gr.Column(scale=1):
                 def process_and_visualize(file_obj, tab_status):
                     try:
+                        parsed_data = transcript_parser.parse_transcript(file_obj.name)
                         gpa_analysis = academic_analyzer.analyze_gpa(parsed_data)
                         grad_status = academic_analyzer.analyze_graduation_status(parsed_data)
                         college_recs = academic_analyzer.generate_college_recommendations(parsed_data)
                         results = [
                             f"## 📊 GPA Analysis",
                             f"**Rating:** {gpa_analysis['rating']}",
                             results.append("\n**Improvement Tips:**")
                             results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']])
                         viz_updates = [
                             gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None),
                             gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None),
                             gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None)
                         ]
                         tab_status[0] = True
                         return "\n".join(results), parsed_data, *viz_updates, tab_status
                     outputs=step2
                 )
             with gr.Tab("Learning Style Quiz", id=1):
                 with gr.Column():
                     gr.Markdown("### 📝 Step 2: Discover Your Learning Style")
                     outputs=progress
                 )
             with gr.Tab("Personal Profile", id=2):
                 with gr.Row():
                     with gr.Column(scale=1):
                     outputs=[tab_completed, step3, step4, save_confirmation]
                 )
             with gr.Tab("Save Profile", id=3):
                 with gr.Row():
                     with gr.Column(scale=1):
                     ]
                 )
             with gr.Tab("AI Assistant", id=4):
                 gr.Markdown("## 💬 Your Personalized Learning Assistant")
                 gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
                 chatbot = gr.Chatbot(height=500)
                 msg = gr.Textbox(label="Your Message")
                 clear = gr.Button("Clear")
                 msg.submit(respond, [msg, chatbot], [msg, chatbot])
                 clear.click(lambda: None, None, chatbot, queue=False)
             with gr.Tab("Goals & Planning", id=5):
                 with gr.Row():
                     with gr.Column(scale=1):
                         calendar_output = gr.HTML()
                         calendar_viz = gr.Plot(label="Calendar Visualization", visible=False)
                 goal_type.change(
                     fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]),
                     inputs=goal_type,
                     calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat())
                     calendar_html = []
                     current_date = datetime.date.fromisoformat(calendar['start_date'])
                     end_date = datetime.date.fromisoformat(calendar['end_date'])
                         gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None)
                     )
                 add_goal_btn.click(
                     fn=lambda gt, desc, date, val: (
                         goal_tracker.add_goal(name.value, gt, desc, date, val),
                     outputs=[goals_output, goal_viz]
                 )
                 generate_calendar_btn.click(
                     fn=lambda date: update_calendar_display(name.value, date),
                     inputs=calendar_start_date,
                     outputs=[calendar_output, calendar_viz]
                 )
         def navigate_to_tab(tab_index: int, tab_completed_status: dict):
             for i in range(tab_index):
                 if not tab_completed_status.get(i, False):
                     messages = [
                         "Please complete the previous steps first"
                     ]
                     return (
+                        gr.Tabs(selected=i),
                         gr.update(
                             value=f"<div class='error-message'>⛔ {messages[i]}</div>",
                             visible=True
             outputs=[tabs, nav_message]
         )
         def toggle_dark_mode(dark):
             return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
             outputs=None
         )
         app.load(fn=lambda: model_loader.load_model(), outputs=[])
     return app