diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -28,45 +28,53 @@ import plotly.express as px import pdfplumber from io import BytesIO import base64 +import datetime +from cryptography.fernet import Fernet +import calendar +from dateutil.relativedelta import relativedelta +import numpy as np -# Handle matplotlib import with fallback -try: - import matplotlib.pyplot as plt - MATPLOTLIB_AVAILABLE = True -except ImportError: - MATPLOTLIB_AVAILABLE = False - plt = None - logging.warning("Matplotlib not available - some visualizations will be disabled") - -# ========== CONFIGURATION ========== +# Enhanced Configuration PROFILES_DIR = "student_profiles" ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"] -MAX_FILE_SIZE_MB = 5 +MAX_FILE_SIZE_MB = 10 # Increased from 5MB MIN_AGE = 5 MAX_AGE = 120 SESSION_TOKEN_LENGTH = 32 HF_TOKEN = os.getenv("HF_TOKEN") -SESSION_TIMEOUT = 3600 # 1 hour session timeout +SESSION_TIMEOUT = 3600 * 3 # 3 hour session timeout +ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode()) +MAX_CONTEXT_HISTORY = 10 +MAX_PROFILE_LOAD_ATTEMPTS = 3 -# Initialize logging +# Initialize logging with enhanced configuration logging.basicConfig( - level=logging.DEBUG, + level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - filename='transcript_parser.log' + handlers=[ + logging.FileHandler('transcript_parser.log'), + logging.StreamHandler() + ] ) +logger = logging.getLogger(__name__) -# Model configuration - Using smaller model -MODEL_NAME = "deepseek-ai/deepseek-llm-1.3b" +# Model configuration - Using more capable model +MODEL_NAME = "deepseek-ai/deepseek-llm-7b" # Upgraded from 1.3b to 7b -# Initialize Hugging Face API +# Initialize Hugging Face API with retry logic if HF_TOKEN: - try: - hf_api = HfApi(token=HF_TOKEN) - HfFolder.save_token(HF_TOKEN) - except Exception as e: - logging.error(f"Failed to initialize Hugging Face API: {str(e)}") + hf_api = None + for attempt in range(3): + try: + hf_api = HfApi(token=HF_TOKEN) + HfFolder.save_token(HF_TOKEN) + logger.info("Hugging Face API initialized successfully") + break + except Exception as e: + logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}") + time.sleep(2 ** attempt) # Exponential backoff -# ========== MODEL LOADER ========== +# ========== ENHANCED MODEL LOADER ========== class ModelLoader: def __init__(self): self.model = None @@ -75,68 +83,106 @@ class ModelLoader: self.loading = False self.error = None self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.load_attempts = 0 + self.max_retries = 3 def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]: - """Lazy load the model with progress feedback""" + """Enhanced lazy load the model with progress feedback and retry logic""" if self.loaded: return self.model, self.tokenizer if self.loading: - while self.loading: - time.sleep(0.1) + while self.loading and self.load_attempts < self.max_retries: + time.sleep(0.5) return self.model, self.tokenizer self.loading = True + self.load_attempts += 1 + try: if progress: - progress(0.1, desc="Checking GPU availability...") + progress(0.1, desc="Initializing model environment...") - torch.cuda.empty_cache() + # Clear GPU cache more aggressively + if self.device == "cuda": + torch.cuda.empty_cache() + torch.cuda.reset_peak_memory_stats() if progress: progress(0.2, desc="Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained( - MODEL_NAME, - trust_remote_code=True - ) + # Tokenizer with more error handling + tokenizer = None + for attempt in range(3): + try: + tokenizer = AutoTokenizer.from_pretrained( + MODEL_NAME, + trust_remote_code=True, + use_fast=True + ) + break + except Exception as e: + if attempt == 2: + raise + logger.warning(f"Tokenizer loading attempt {attempt + 1} failed: {str(e)}") + time.sleep(2 ** attempt) if progress: progress(0.5, desc="Loading model (this may take a few minutes)...") + # Model configuration with fallbacks model_kwargs = { "trust_remote_code": True, "torch_dtype": torch.float16 if self.device == "cuda" else torch.float32, "device_map": "auto" if self.device == "cuda" else None, "low_cpu_mem_usage": True, - "offload_folder": "offload" - } + "offload_folder": "offload", + "max_memory": {i: "20GiB" for i in range(torch.cuda.device_count())} if torch.cuda.device_count() > 1 else {} - try: - model = AutoModelForCausalLM.from_pretrained( - MODEL_NAME, - **model_kwargs - ) - except torch.cuda.OutOfMemoryError: - model_kwargs["device_map"] = None - model = AutoModelForCausalLM.from_pretrained( - MODEL_NAME, - **model_kwargs - ).to('cpu') - self.device = 'cpu' - + model = None + for attempt in range(3): + try: + model = AutoModelForCausalLM.from_pretrained( + MODEL_NAME, + **model_kwargs + ) + break + except torch.cuda.OutOfMemoryError: + logger.warning("CUDA OOM encountered, trying CPU offloading") + model_kwargs["device_map"] = None + model = AutoModelForCausalLM.from_pretrained( + MODEL_NAME, + **model_kwargs + ).to('cpu') + self.device = 'cpu' + break + except Exception as e: + if attempt == 2: + raise + logger.warning(f"Model loading attempt {attempt + 1} failed: {str(e)}") + time.sleep(2 ** attempt) + + # Test inference + if progress: + progress(0.8, desc="Verifying model...") test_input = tokenizer("Test", return_tensors="pt").to(self.device) - _ = model.generate(**test_input, max_new_tokens=1) + with torch.no_grad(): + _ = model.generate(**test_input, max_new_tokens=1) self.model = model.eval() self.tokenizer = tokenizer self.loaded = True + logger.info("Model loaded successfully") return model, tokenizer except Exception as e: - self.error = f"Model loading failed: {str(e)}" - logging.error(self.error) + self.error = f"Model loading failed after {self.load_attempts} attempts: {str(e)}" + logger.error(self.error) + if self.load_attempts < self.max_retries: + logger.info(f"Retrying model loading ({self.load_attempts}/{self.max_retries})") + time.sleep(5) + return self.load_model(progress) return None, None finally: self.loading = False @@ -148,7 +194,19 @@ model_loader = ModelLoader() def get_model_and_tokenizer(): return model_loader.load_model() -# ========== UTILITY FUNCTIONS ========== +# ========== ENHANCED UTILITY FUNCTIONS ========== +class DataEncryptor: + def __init__(self, key: str): + self.cipher = Fernet(key.encode()) + + def encrypt(self, data: str) -> str: + return self.cipher.encrypt(data.encode()).decode() + + def decrypt(self, encrypted_data: str) -> str: + return self.cipher.decrypt(encrypted_data.encode()).decode() + +encryptor = DataEncryptor(ENCRYPTION_KEY) + def generate_session_token() -> str: alphabet = string.ascii_letters + string.digits return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH)) @@ -192,387 +250,253 @@ def validate_file(file_obj) -> None: if file_size > MAX_FILE_SIZE_MB: raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.") -# ========== ENHANCED PDF PARSING ========== -def parse_transcript_pdf(file_path: str): - """Parse the PDF transcript and extract structured data using pdfplumber""" - student_info = {} - requirements = [] - courses = [] - - with pdfplumber.open(file_path) as pdf: - for page in pdf.pages: - text = page.extract_text() - tables = page.extract_tables() - - # Parse student information from the first table - if not student_info and len(tables) > 0: - header_row = tables[0][0] - if "Graduation Progress Summary" in header_row[0]: - student_info = { - 'name': tables[0][1][0].split('-')[-1].strip(), - 'id': tables[0][1][0].split('-')[0].strip(), - 'school': tables[0][0][0].split('|')[1].strip(), - 'cohort': tables[0][0][1].replace('Cohort', '').strip(), - 'grade': tables[0][2][0].replace('Current Grade:', '').strip(), - 'grad_year': tables[0][2][1].replace('YOG', '').strip(), - 'gpa_weighted': tables[0][2][2].replace('Weighted GPA', '').strip(), - 'gpa_unweighted': tables[0][0][2].replace('Un-weighted GPA', '').strip(), - 'service_hours': tables[0][0][3].replace('Comm Serv Hours', '').strip(), - 'service_date': tables[0][2][3].replace('Comm Serv Date', '').strip(), - 'total_credits': tables[0][2][4].replace('Total Credits Earned', '').strip(), - 'virtual_grade': tables[0][0][4].replace('Virtual Grade', '').strip() - } - - # Parse requirements table - if len(tables) > 1 and "Code" in tables[1][0][0]: - for row in tables[1][1:]: - if len(row) >= 6 and row[0] and row[0] != 'Total': - requirements.append({ - 'code': row[0], - 'desc': row[1], - 'required': float(row[2]) if row[2] else 0, - 'waived': float(row[3]) if row[3] else 0, - 'completed': float(row[4]) if row[4] else 0, - 'status': float(row[5].replace('%', '')) if row[5] and '%' in row[5] else 0 - }) - - # Parse course history table - if len(tables) > 2 and "Requirement" in tables[2][0][0]: - for row in tables[2][1:]: - if len(row) >= 10 and row[0]: - courses.append({ - 'requirement': row[0], - 'year': row[1], - 'grade': row[2], - 'course_code': row[3], - 'course_name': row[4], - 'term': row[5], - 'district_num': row[6], - 'grade_earned': row[7], - 'included': row[8], - 'credits': float(row[9]) if row[9] and row[9] not in ['inProgress', ''] else 0, - 'status': 'Completed' if row[9] and row[9] != 'inProgress' else 'In Progress' - }) +def remove_sensitive_info(text: str) -> str: + """Enhanced PII removal with more patterns""" + patterns = [ + (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'), + (r'\b\d{6,9}\b', '[ID]'), + (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'), + (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'), + (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'), # Simple name pattern + (r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'), + (r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]') + ] - return student_info, requirements, courses + for pattern, replacement in patterns: + text = re.sub(pattern, replacement, text) + return text -def analyze_college_readiness(student_info, requirements, courses): - """Analyze the student's profile for college readiness""" - analysis = { - 'gpa_rating': '', - 'rigor_rating': '', - 'service_rating': '', - 'recommendations': [] - } - - # GPA Analysis - weighted_gpa = float(student_info.get('gpa_weighted', 0)) - if weighted_gpa >= 4.5: - analysis['gpa_rating'] = 'Excellent (Highly Competitive)' - elif weighted_gpa >= 3.8: - analysis['gpa_rating'] = 'Strong (Competitive)' - elif weighted_gpa >= 3.0: - analysis['gpa_rating'] = 'Good' - else: - analysis['gpa_rating'] = 'Below Average' - - # Course Rigor Analysis - ap_count = sum(1 for course in courses if 'AP' in course['course_name']) - de_count = sum(1 for course in courses if 'DE' in course['course_name']) - honors_count = sum(1 for course in courses if 'Honors' in course['course_name']) - - total_rigorous = ap_count + de_count + honors_count - if total_rigorous >= 10: - analysis['rigor_rating'] = 'Very High' - elif total_rigorous >= 6: - analysis['rigor_rating'] = 'High' - elif total_rigorous >= 3: - analysis['rigor_rating'] = 'Moderate' - else: - analysis['rigor_rating'] = 'Low' - - # Community Service Analysis - service_hours = int(student_info.get('service_hours', 0)) - if service_hours >= 100: - analysis['service_rating'] = 'Exceptional' - elif service_hours >= 50: - analysis['service_rating'] = 'Strong' - elif service_hours >= 30: - analysis['service_rating'] = 'Adequate' - else: - analysis['service_rating'] = 'Limited' - - # Generate recommendations - if weighted_gpa < 3.5 and ap_count < 3: - analysis['recommendations'].append("Consider taking more advanced courses (AP/DE) to strengthen your academic profile") - if service_hours < 50: - analysis['recommendations'].append("Additional community service hours could enhance your college applications") +# ========== ENHANCED PDF PARSING ========== +class EnhancedTranscriptParser: + def __init__(self): + self.common_school_patterns = { + 'miami_dade': r'(MIAMI-DADE|DADE COUNTY|MDCPS)', + 'broward': r'(BROWARD COUNTY|BCPS)', + 'florida': r'(FLORIDA|FDOE|FL DOE)' + } + self.transcript_templates = { + 'miami_dade': self._parse_miami_dade_transcript, + 'broward': self._parse_broward_transcript, + 'florida': self._parse_florida_standard_transcript, + 'default': self._parse_generic_transcript + } - return analysis - -def create_requirements_visualization_matplotlib(requirements): - """Create matplotlib visualization for requirements completion""" - if not MATPLOTLIB_AVAILABLE or not requirements: - return None + def detect_transcript_type(self, text: str) -> str: + """Detect the transcript format based on patterns""" + text = text.upper() + for template, pattern in self.common_school_patterns.items(): + if re.search(pattern, text): + return template + return 'default' - try: - fig, ax = plt.subplots(figsize=(10, 6)) - req_names = [req['code'] for req in requirements] - req_completion = [min(req['status'], 100) for req in requirements] - colors = ['#4CAF50' if x >= 100 else '#FFC107' if x > 0 else '#F44336' for x in req_completion] - - bars = ax.barh(req_names, req_completion, color=colors) - ax.set_xlabel('Completion (%)') - ax.set_title('Requirement Completion Status') - ax.set_xlim(0, 100) - - # Add value labels - for bar in bars: - width = bar.get_width() - ax.text(width + 1, bar.get_y() + bar.get_height()/2, - f'{width:.1f}%', - ha='left', va='center') - - plt.tight_layout() - return fig - except Exception as e: - logging.error(f"Error creating matplotlib visualization: {str(e)}") - return None - -def create_credits_distribution_visualization(requirements): - """Create pie chart for credits distribution""" - if not MATPLOTLIB_AVAILABLE or not requirements: - return None + def parse_transcript(self, file_path: str, file_ext: str) -> Dict: + """Enhanced parsing with format detection and fallbacks""" + try: + # First extract text with appropriate method + text = self.extract_text_from_file(file_path, file_ext) + if not text.strip(): + raise ValueError("No text could be extracted from file") + + # Detect transcript type + transcript_type = self.detect_transcript_type(text) + logger.info(f"Detected transcript type: {transcript_type}") + + # Try specialized parser first + parser_func = self.transcript_templates.get(transcript_type, self._parse_generic_transcript) + parsed_data = parser_func(text) + + if not parsed_data: + logger.warning(f"Specialized parser failed, trying generic parser") + parsed_data = self._parse_generic_transcript(text) + + if not parsed_data: + raise ValueError("No data could be parsed from transcript") + + # Validate and enhance parsed data + self.validate_parsed_data(parsed_data) + self.enhance_parsed_data(parsed_data) + + return parsed_data + + except Exception as e: + logger.error(f"Error parsing transcript: {str(e)}") + raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}") - try: - fig, ax = plt.subplots(figsize=(8, 8)) - - core_credits = sum(req['completed'] for req in requirements if req['code'] in ['A-English', 'B-Math', 'C-Science', 'D-Social']) - elective_credits = sum(req['completed'] for req in requirements if req['code'] in ['G-Electives']) - other_credits = sum(req['completed'] for req in requirements if req['code'] in ['E-Arts', 'F-PE']) + def extract_text_from_file(self, file_path: str, file_ext: str) -> str: + """Enhanced text extraction with multiple fallbacks""" + text = "" - credit_values = [core_credits, elective_credits, other_credits] - credit_labels = ['Core Subjects', 'Electives', 'Arts/PE'] - colors = ['#3498db', '#2ecc71', '#9b59b6'] - - ax.pie(credit_values, labels=credit_labels, autopct='%1.1f%%', - colors=colors, startangle=90) - ax.set_title('Credit Distribution') - - plt.tight_layout() - return fig - except Exception as e: - logging.error(f"Error creating credits visualization: {str(e)}") - return None - -# ========== TEXT EXTRACTION FUNCTIONS ========== -def preprocess_text(text: str) -> str: - """Normalize text for more reliable parsing""" - text = re.sub(r'\s+', ' ', text) # Normalize whitespace - text = text.upper() # Standardize case for certain fields - return text - -def extract_text_from_file(file_path: str, file_ext: str) -> str: - text = "" - - try: - if file_ext == '.pdf': - try: - # First try pdfplumber for better table extraction - student_info, requirements, courses = parse_transcript_pdf(file_path) - if student_info: - # Convert parsed data to text format for compatibility - text += f"STUDENT INFORMATION:\n" - text += f"Name: {student_info.get('name', '')}\n" - text += f"ID: {student_info.get('id', '')}\n" - text += f"School: {student_info.get('school', '')}\n" - text += f"Grade: {student_info.get('grade', '')}\n" - text += f"Graduation Year: {student_info.get('grad_year', '')}\n" - text += f"Weighted GPA: {student_info.get('gpa_weighted', '')}\n" - text += f"Unweighted GPA: {student_info.get('gpa_unweighted', '')}\n" - text += f"Service Hours: {student_info.get('service_hours', '')}\n" - text += f"Total Credits: {student_info.get('total_credits', '')}\n\n" - - text += "GRADUATION REQUIREMENTS:\n" - for req in requirements: - text += f"{req['code']} | {req['desc']} | Required: {req['required']} | Completed: {req['completed']} | Status: {req['status']}%\n" - - text += "\nCOURSE HISTORY:\n" - for course in courses: - text += f"{course['course_code']} | {course['course_name']} | Grade: {course['grade_earned']} | Credits: {course['credits']} | Status: {course['status']}\n" - - return text - - # Fall back to regular text extraction if specialized parsing fails - import pdfplumber - with pdfplumber.open(file_path) as pdf: - for page in pdf.pages: - # Try to extract tables first - tables = page.extract_tables({ - "vertical_strategy": "text", - "horizontal_strategy": "text", - "intersection_y_tolerance": 10 - }) - - if tables: - for table in tables: - for row in table: - text += " | ".join(str(cell).strip() for cell in row if cell) + "\n" - - # Fall back to text extraction if tables are empty - page_text = page.extract_text() - if page_text: - text += page_text + "\n" + try: + if file_ext == '.pdf': + # Try pdfplumber first for better table handling + try: + with pdfplumber.open(file_path) as pdf: + for page in pdf.pages: + # Try to extract tables first + tables = page.extract_tables({ + "vertical_strategy": "text", + "horizontal_strategy": "text", + "intersection_y_tolerance": 10, + "join_tolerance": 20 + }) + + if tables: + for table in tables: + for row in table: + text += " | ".join(str(cell).strip() for cell in row if cell) + "\n" - if not text.strip(): - raise ValueError("PDFPlumber returned empty text") + # Fall back to text extraction if tables are empty + page_text = page.extract_text() + if page_text: + text += page_text + "\n" - except Exception as e: - logging.warning(f"PDFPlumber failed: {str(e)}. Trying PyMuPDF...") - doc = fitz.open(file_path) - for page in doc: - text += page.get_text("text") + '\n' + if not text.strip(): + raise ValueError("PDFPlumber returned empty text") - elif file_ext in ['.png', '.jpg', '.jpeg']: - text = extract_text_with_ocr(file_path) + except Exception as e: + logger.warning(f"PDFPlumber failed: {str(e)}. Trying PyMuPDF...") + doc = fitz.open(file_path) + for page in doc: + text += page.get_text("text", flags=fitz.TEXT_PRESERVE_IMAGES) + '\n' + + elif file_ext in ['.png', '.jpg', '.jpeg']: + text = self.extract_text_with_enhanced_ocr(file_path) - text = clean_extracted_text(text) + text = self.clean_extracted_text(text) + + if not text.strip(): + raise ValueError("The file appears to be empty or contains no readable text.") + + return text - if not text.strip(): - raise ValueError("No text could be extracted.") + except Exception as e: + logger.error(f"Text extraction error: {str(e)}") + raise ValueError(f"Failed to extract text: {str(e)}") + + def extract_text_with_enhanced_ocr(self, file_path: str) -> str: + """Enhanced OCR with preprocessing""" + try: + image = Image.open(file_path) - return text + # Preprocessing for better OCR + image = image.convert('L') # Grayscale + image = image.point(lambda x: 0 if x < 140 else 255, '1') # Thresholding + + # Custom config for academic documents + custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-.,:()%$@ ' + + # Try with different page segmentation modes + for psm in [6, 11, 4]: # Try different modes + text = pytesseract.image_to_string(image, config=f"{custom_config} --psm {psm}") + if len(text.strip()) > 50: # If we got reasonable text + break + + return text + except Exception as e: + raise ValueError(f"OCR processing failed: {str(e)}") - except Exception as e: - logging.error(f"Text extraction error: {str(e)}") - raise ValueError(f"Failed to extract text: {str(e)}") - -def extract_text_with_ocr(file_path: str) -> str: - try: - image = Image.open(file_path) - image = image.convert('L') - image = image.point(lambda x: 0 if x < 128 else 255, '1') - custom_config = r'--oem 3 --psm 6' - text = pytesseract.image_to_string(image, config=custom_config) + def clean_extracted_text(self, text: str) -> str: + """Enhanced cleaning for academic transcripts""" + # Normalize whitespace and case + text = re.sub(r'\s+', ' ', text).strip() + + # Fix common OCR errors in academic contexts + replacements = { + 'GradeLv1': 'GradeLvl', + 'CrsNu m': 'CrsNum', + 'YOG': 'Year of Graduation', + 'Comm Serv': 'Community Service', + r'\bA\s*-\s*': 'A-', # Fix requirement codes + r'\bB\s*-\s*': 'B-', + r'\bC\s*-\s*': 'C-', + r'\bD\s*-\s*': 'D-', + r'\bE\s*-\s*': 'E-', + r'\bF\s*-\s*': 'F-', + r'\bG\s*-\s*': 'G-', + r'\bZ\s*-\s*': 'Z-', + 'lnProgress': 'inProgress', + 'lP': 'IP', + 'AP\s': 'AP ', + 'DE\s': 'DE ', + 'Honors\s': 'Honors ', + 'lB': 'IB' + } + + for pattern, replacement in replacements.items(): + text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) + + # Fix course codes with spaces + text = re.sub(r'(\b[A-Z]{2,4})\s(\d{3}[A-Z]?\b)', r'\1\2', text) + return text - except Exception as e: - raise ValueError(f"OCR processing failed: {str(e)}") - -def clean_extracted_text(text: str) -> str: - """Special cleaning for Miami-Dade transcripts""" - # Normalize whitespace - text = re.sub(r'\s+', ' ', text).strip() - # Fix common OCR errors - replacements = { - 'GradeLv1': 'GradeLvl', - 'CrsNu m': 'CrsNum', - 'YOG': 'Year of Graduation', - 'Comm Serv': 'Community Service', - r'\bA\s*-\s*': 'A-', # Fix requirement codes - r'\bB\s*-\s*': 'B-', - r'\bC\s*-\s*': 'C-', - r'\bD\s*-\s*': 'D-', - r'\bE\s*-\s*': 'E-', - r'\bF\s*-\s*': 'F-', - r'\bG\s*-\s*': 'G-', - r'\bZ\s*-\s*': 'Z-' - } - - for pattern, replacement in replacements.items(): - text = re.sub(pattern, replacement, text) - - # Fix course codes with spaces - text = re.sub(r'(\b[A-Z]{2,4})\s(\d{3}[A-Z]?\b)', r'\1\2', text) - - # Fix common OCR errors in credits - text = re.sub(r'in\s*Progress', 'inProgress', text, flags=re.IGNORECASE) - - return text - -def remove_sensitive_info(text: str) -> str: - text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text) - text = re.sub(r'\b\d{6,9}\b', '[ID]', text) - text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text) - return text - -# ========== TRANSCRIPT PARSING ========== -class Course(BaseModel): - requirement: str - school_year: str - grade_level: str - course_code: str - description: str - term: str - district_number: str - fg: str - included: str - credits: str - -class GraduationProgress(BaseModel): - student_name: str - student_id: str - current_grade: str - year_of_graduation: str - unweighted_gpa: float - weighted_gpa: float - community_service_hours: int - community_service_date: str - total_credits_earned: float - virtual_grade: str - requirements: Dict[str, Dict[str, float]]] - courses: List[Course] - assessments: Dict[str, str] - -def validate_parsed_data(parsed_data: Dict) -> bool: - """Ensure all critical fields exist""" - required_fields = [ - ('student_info', 'name'), - ('student_info', 'weighted_gpa'), - ('requirements', 'A-English'), # Sample requirement - ('course_history', 0) # At least one course - ] + def validate_parsed_data(self, parsed_data: Dict) -> bool: + """Enhanced validation with more fields""" + required_fields = [ + ('student_info', 'name'), + ('student_info', 'id'), + ('requirements',), # At least some requirements + ('course_history',) # At least some courses + ] + + for path in required_fields: + current = parsed_data + for key in path: + if key not in current: + raise ValueError(f"Missing critical field: {'.'.join(path)}") + current = current[key] + return True - for path in required_fields: - current = parsed_data - for key in path: - if key not in current: - raise ValueError(f"Missing critical field: {'.'.join(path)}") - current = current[key] - return True - -class TranscriptParser: - def __init__(self): - self.student_data = {} - self.requirements = {} - self.current_courses = [] - self.course_history = [] - self.graduation_status = {} + def enhance_parsed_data(self, parsed_data: Dict) -> Dict: + """Add derived fields and calculations""" + # Calculate total credits if not present + if 'total_credits' not in parsed_data.get('student_info', {}): + try: + total_credits = sum( + float(course.get('credits', 0)) + for course in parsed_data.get('course_history', []) + if course and str(course.get('credits', '0')).replace('.', '').isdigit() + ) + parsed_data['student_info']['total_credits'] = round(total_credits, 2) + except: + pass - def parse_transcript(self, text: str) -> Dict: - """Parse transcript text and return structured data""" - try: - text = preprocess_text(text) - - # First try the specialized Miami-Dade parser - parsed_data = self._parse_miami_dade_transcript(text) - if parsed_data: - validate_parsed_data(parsed_data) - return parsed_data + # Calculate GPA if not present + if 'weighted_gpa' not in parsed_data.get('student_info', {}): + try: + grades = [] + grade_points = { + 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, + 'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0 + } - # Fall back to simplified parser if detailed parsing fails - parsed_data = self._parse_simplified_transcript(text) - if parsed_data: - return parsed_data + for course in parsed_data.get('course_history', []): + grade = course.get('grade_earned', '').upper() + if grade in grade_points: + grades.append(grade_points[grade]) - raise ValueError("No data could be parsed from the transcript") - - except Exception as e: - logging.error(f"Error parsing transcript: {str(e)}") - raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}") - + if grades: + unweighted_gpa = sum(grades) / len(grades) + parsed_data['student_info']['unweighted_gpa'] = round(unweighted_gpa, 2) + + # Simple weighted GPA calculation (AP/IB/DE courses get +1) + weighted_grades = [] + for course in parsed_data.get('course_history', []): + grade = course.get('grade_earned', '').upper() + if grade in grade_points: + weight = 1.0 if any(x in course.get('course_name', '').upper() + for x in ['AP', 'IB', 'DE', 'HONORS']) else 0.0 + weighted_grades.append(grade_points[grade] + weight) + + if weighted_grades: + parsed_data['student_info']['weighted_gpa'] = round(sum(weighted_grades) / len(weighted_grades), 2) + except: + pass + + return parsed_data + def _parse_miami_dade_transcript(self, text: str) -> Optional[Dict]: - """Specialized parser for Miami-Dade County Public Schools transcripts""" + """Enhanced Miami-Dade parser with better table handling""" try: parsed_data = { 'student_info': {}, @@ -585,24 +509,34 @@ class TranscriptParser: student_info_match = re.search( r"(\d{7})\s*-\s*(.*?)\s*\n.*?Current Grade:\s*(\d+).*?YOG\s*(\d{4})", text, - re.DOTALL + re.DOTALL | re.IGNORECASE ) if student_info_match: - parsed_data['student_info']['id'] = student_info_match.group(1) - parsed_data['student_info']['name'] = student_info_match.group(2).strip() - parsed_data['student_info']['grade'] = student_info_match.group(3) - parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4) - - # Extract GPA information - gpa_matches = re.findall( - r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)", - text, - re.IGNORECASE - ) - if len(gpa_matches) >= 1: - parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0]) - if len(gpa_matches) >= 2: - parsed_data['student_info']['weighted_gpa'] = float(gpa_matches[1]) + parsed_data['student_info'] = { + 'id': student_info_match.group(1), + 'name': student_info_match.group(2).strip(), + 'grade': student_info_match.group(3), + 'year_of_graduation': student_info_match.group(4), + 'district': 'Miami-Dade' + } + + # Extract GPA information with more flexible patterns + gpa_patterns = [ + r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)", + r"GPA\s*\(.*?\)\s*:\s*([\d.]+)", + r"Grade\s*Point\s*Average\s*:\s*([\d.]+)" + ] + + gpa_values = [] + for pattern in gpa_patterns: + gpa_values.extend(re.findall(pattern, text, re.IGNORECASE)) + if len(gpa_values) >= 2: + break + + if len(gpa_values) >= 1: + parsed_data['student_info']['unweighted_gpa'] = float(gpa_values[0]) + if len(gpa_values) >= 2: + parsed_data['student_info']['weighted_gpa'] = float(gpa_values[1]) # Extract community service info service_hours_match = re.search(r"Comm\s*Serv\s*Hours\s*(\d+)", text, re.IGNORECASE) @@ -623,9 +557,9 @@ class TranscriptParser: if virtual_grade_match: parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1) - # Extract requirements section - more robust table parsing + # Enhanced requirements section parsing req_section = re.search( - r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", + r"(?:Graduation\s*Requirements|Requirements\s*Summary).*?(Code\s*Description.*?)(?:\n\s*\n|$)", text, re.DOTALL | re.IGNORECASE ) @@ -660,24 +594,12 @@ class TranscriptParser: "status": status } except (IndexError, ValueError) as e: - logging.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}") + logger.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}") continue - # Extract assessments section - assess_section = re.search(r"Z-Assessment.*?\n(.*?)(?:\n\s*\n|$)", text, re.DOTALL | re.IGNORECASE) - if assess_section: - assess_lines = [line.strip() for line in assess_section.group(1).split('\n') if line.strip()] - for line in assess_lines: - if '|' in line: - parts = [part.strip() for part in line.split('|') if part.strip()] - if len(parts) >= 5 and parts[0].startswith('Z-'): - name = parts[0].replace('Z-', '').strip() - status = parts[4] if len(parts) > 4 else "" - parsed_data['assessments'][name] = status - - # Extract course history with more fault-tolerant parsing + # Enhanced course history parsing course_section = re.search( - r"Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits(.*?)(?:Legend|\Z)", + r"(?:Course\s*History|Academic\s*Record).*?(Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits.*?)(?:\n\s*\n|$)", text, re.DOTALL | re.IGNORECASE ) @@ -691,7 +613,6 @@ class TranscriptParser: for line in course_lines: parts = [part.strip() for part in line.split('|') if part.strip()] - # More robust handling of course data try: course = { 'requirement': parts[0] if len(parts) > 0 else "", @@ -703,10 +624,11 @@ class TranscriptParser: 'district_number': parts[6] if len(parts) > 6 else "", 'fg': parts[7] if len(parts) > 7 else "", 'included': parts[8] if len(parts) > 8 else "", - 'credits': parts[9] if len(parts) > 9 else "0" + 'credits': parts[9] if len(parts) > 9 else "0", + 'status': 'Completed' if parts[9] and parts[9] != 'inProgress' else 'In Progress' } - # Handle "inProgress" and empty credits + # Handle credits conversion if "inprogress" in course['credits'].lower() or not course['credits']: course['credits'] = "0" elif not course['credits'].replace('.','').isdigit(): @@ -714,500 +636,838 @@ class TranscriptParser: parsed_data['course_history'].append(course) except (IndexError, ValueError) as e: - logging.warning(f"Skipping malformed course line: {line}. Error: {str(e)}") + logger.warning(f"Skipping malformed course line: {line}. Error: {str(e)}") continue return parsed_data except Exception as e: - logging.warning(f"Miami-Dade transcript parsing failed: {str(e)}") + logger.warning(f"Miami-Dade transcript parsing failed: {str(e)}") return None - - def _parse_simplified_transcript(self, text: str) -> Dict: - """Fallback simplified transcript parser with multiple pattern attempts""" - patterns = [ - (r'(?:COURSE|SUBJECT)\s*CODE.*?GRADE.*?CREDITS(.*?)(?:\n\s*\n|\Z)', 'table'), - (r'([A-Z]{2,4}\s?\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', 'line'), - (r'(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', 'minimal') - ] - - for pattern, pattern_type in patterns: - try: - if pattern_type == 'table': - # Parse tabular data - table_section = re.search(pattern, text, re.DOTALL | re.IGNORECASE) - if table_section: - courses = re.findall(r'([A-Z]{2,4}\s?\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', - table_section.group(1)) - elif pattern_type == 'line': - courses = re.findall(pattern, text) - else: - courses = re.findall(pattern, text) - + + def _parse_broward_transcript(self, text: str) -> Optional[Dict]: + """Parser for Broward County transcripts""" + try: + parsed_data = { + 'student_info': {}, + 'requirements': {}, + 'course_history': [], + 'assessments': {} + } + + # Broward-specific patterns + student_info_match = re.search( + r"Student:\s*(\d+)\s*-\s*(.*?)\s*Grade:\s*(\d+)", + text, + re.IGNORECASE + ) + if student_info_match: + parsed_data['student_info'] = { + 'id': student_info_match.group(1), + 'name': student_info_match.group(2).strip(), + 'grade': student_info_match.group(3), + 'district': 'Broward' + } + + # Add Broward-specific parsing logic here... + + return parsed_data + except Exception as e: + logger.warning(f"Broward transcript parsing failed: {str(e)}") + return None + + def _parse_florida_standard_transcript(self, text: str) -> Optional[Dict]: + """Parser for Florida standard transcripts""" + try: + parsed_data = { + 'student_info': {}, + 'requirements': {}, + 'course_history': [], + 'assessments': {} + } + + # Florida standard patterns + student_info_match = re.search( + r"Florida\s*Student\s*Transcript.*?Name:\s*(.*?)\s*ID:\s*(\d+)", + text, + re.IGNORECASE | re.DOTALL + ) + if student_info_match: + parsed_data['student_info'] = { + 'name': student_info_match.group(1).strip(), + 'id': student_info_match.group(2), + 'district': 'Florida' + } + + # Add Florida standard parsing logic here... + + return parsed_data + except Exception as e: + logger.warning(f"Florida standard transcript parsing failed: {str(e)}") + return None + + def _parse_generic_transcript(self, text: str) -> Optional[Dict]: + """Fallback parser for generic transcripts""" + try: + parsed_data = { + 'student_info': {}, + 'requirements': {}, + 'course_history': [], + 'assessments': {} + } + + # Try to extract basic student info + name_match = re.search(r"(?:Student|Name):\s*(.*?)\s*(?:\n|ID|$)", text, re.IGNORECASE) + if name_match: + parsed_data['student_info']['name'] = name_match.group(1).strip() + + id_match = re.search(r"(?:ID|Student\s*Number):\s*(\d+)", text, re.IGNORECASE) + if id_match: + parsed_data['student_info']['id'] = id_match.group(1) + + # Try to extract courses + course_patterns = [ + r"([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)", # CODE DESC GRADE CREDITS + r"(\d{4}-\d{4})\s+([A-Z]{2,4}\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)", # YEAR CODE DESC GRADE CREDITS + r"(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)" # DESC GRADE CREDITS + ] + + for pattern in course_patterns: + courses = re.findall(pattern, text) if courses: - parsed_data = {'course_history': []} for course in courses: - if len(course) >= 4: + if len(course) == 4: parsed_data['course_history'].append({ - 'course_code': course[0].strip(), - 'description': course[1].strip(), - 'grade': course[2].strip(), - 'credits': float(course[3]) if course[3] else 0.0 + 'course_code': course[0], + 'description': course[1], + 'grade': course[2], + 'credits': course[3] + }) + elif len(course) == 5: + parsed_data['course_history'].append({ + 'school_year': course[0], + 'course_code': course[1], + 'description': course[2], + 'grade': course[3], + 'credits': course[4] }) elif len(course) == 3: parsed_data['course_history'].append({ - 'description': course[0].strip(), - 'grade': course[1].strip(), - 'credits': float(course[2]) if course[2] else 0.0 + 'description': course[0], + 'grade': course[1], + 'credits': course[2] }) - return parsed_data - except Exception as e: - logging.warning(f"Pattern {pattern} failed: {str(e)}") - continue - - return None - -# ========== ENHANCED ANALYSIS FUNCTIONS ========== -def analyze_gpa(parsed_data: Dict) -> str: - try: - gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) - if gpa >= 4.5: - return "🌟 Excellent GPA! You're in the top tier of students." - elif gpa >= 3.5: - return "šŸ‘ Good GPA! You're performing above average." - elif gpa >= 2.5: - return "ā„¹ļø Average GPA. Consider focusing on improvement in weaker areas." - else: - return "āš ļø Below average GPA. Please consult with your academic advisor." - except (TypeError, ValueError, KeyError, AttributeError): - return "āŒ Could not analyze GPA." + break + + return parsed_data if parsed_data['course_history'] else None + except Exception as e: + logger.warning(f"Generic transcript parsing failed: {str(e)}") + return None -def analyze_graduation_status(parsed_data: Dict) -> str: - try: - total_required = sum( - float(req.get('required', 0)) - for req in parsed_data.get('requirements', {}).values() - if req and str(req.get('required', '0')).replace('.', '').isdigit() - ) - - total_completed = sum( - float(req.get('completed', 0)) - for req in parsed_data.get('requirements', {}).values() - if req and str(req.get('completed', '0')).replace('.', '').isdigit() - ) - - completion_percentage = (total_completed / total_required) * 100 if total_required > 0 else 0 - - if completion_percentage >= 100: - return "šŸŽ‰ You've met all graduation requirements!" - elif completion_percentage >= 80: - return f"āœ… You've completed {completion_percentage:.1f}% of requirements. Almost there!" - elif completion_percentage >= 50: - return f"šŸ”„ You've completed {completion_percentage:.1f}% of requirements. Keep working!" - else: - return f"āš ļø You've only completed {completion_percentage:.1f}% of requirements. Please meet with your counselor." - except (ZeroDivisionError, TypeError, KeyError, AttributeError): - return "āŒ Could not analyze graduation status." +# Initialize enhanced parser +transcript_parser = EnhancedTranscriptParser() -def generate_advice(parsed_data: Dict) -> str: - advice = [] - - # GPA advice - try: - gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) - if gpa < 3.0: - advice.append("šŸ“š Your GPA could improve. Consider:\n- Seeking tutoring for challenging subjects\n- Meeting with teachers during office hours\n- Developing better study habits") - except (TypeError, ValueError, KeyError, AttributeError): - pass - - # Community service advice - try: - service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0)) - if service_hours < 100: - advice.append("šŸ¤ Consider more community service:\n- Many colleges value 100+ hours\n- Look for opportunities that align with your interests") - except (TypeError, ValueError, KeyError, AttributeError): - pass +# ========== ENHANCED ANALYSIS FUNCTIONS ========== +class AcademicAnalyzer: + def __init__(self): + self.gpa_scale = { + 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, + 'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0 + } + self.college_tiers = { + 'ivy_league': {'gpa': 4.3, 'rigor': 8, 'service': 100}, + 'top_tier': {'gpa': 4.0, 'rigor': 6, 'service': 80}, + 'competitive': {'gpa': 3.7, 'rigor': 4, 'service': 60}, + 'good': {'gpa': 3.3, 'rigor': 2, 'service': 40}, + 'average': {'gpa': 2.7, 'rigor': 1, 'service': 20} + } - # Missing requirements advice - try: - missing_reqs = [ - req for code, req in parsed_data.get('requirements', {}).items() - if req and float(req.get('percent_complete', 0)) < 100 and not code.startswith("Z-Assessment") - ] + def analyze_gpa(self, parsed_data: Dict) -> Dict: + """Enhanced GPA analysis with more detailed feedback""" + analysis = { + 'rating': '', + 'description': '', + 'comparison': '', + 'improvement_tips': [] + } - if missing_reqs: - req_list = "\n- ".join([f"{code}: {req.get('description', '')}" for code, req in missing_reqs]) - advice.append(f"šŸŽ“ Focus on completing these requirements:\n- {req_list}") - except (TypeError, ValueError, KeyError, AttributeError): - pass - - # Course rigor advice - try: - ap_count = sum(1 for course in parsed_data.get('course_history', []) - if course and "ADVANCED PLACEMENT" in course.get('description', '').upper()) - if ap_count < 3: - advice.append("🧠 Consider taking more challenging courses:\n- AP/IB courses can strengthen college applications\n- Shows academic rigor to admissions officers") - except (TypeError, KeyError, AttributeError): - pass + try: + weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) + unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0)) + + if weighted_gpa >= 4.5: + analysis['rating'] = 'Excellent' + analysis['description'] = "🌟 You're in the top tier of students with a highly competitive GPA." + analysis['comparison'] = "This puts you in the top 5% of students nationally." + analysis['improvement_tips'] = [ + "Consider taking advanced courses to challenge yourself", + "Look into college-level courses or research opportunities" + ] + elif weighted_gpa >= 4.0: + analysis['rating'] = 'Strong' + analysis['description'] = "šŸ‘ Your GPA is strong and competitive for most colleges." + analysis['comparison'] = "This is above the national average and competitive for many universities." + analysis['improvement_tips'] = [ + "Maintain your current study habits", + "Consider adding 1-2 more challenging courses" + ] + elif weighted_gpa >= 3.5: + analysis['rating'] = 'Good' + analysis['description'] = "ā„¹ļø Your GPA is good but could be improved for more competitive schools." + analysis['comparison'] = "This is slightly above the national average." + analysis['improvement_tips'] = [ + "Focus on improving in your weaker subjects", + "Consider getting tutoring for challenging courses", + "Develop better study habits and time management" + ] + elif weighted_gpa >= 3.0: + analysis['rating'] = 'Average' + analysis['description'] = "āš ļø Your GPA is average. Focus on improvement for better college options." + analysis['comparison'] = "This is around the national average." + analysis['improvement_tips'] = [ + "Identify your weakest subjects and focus on them", + "Develop a consistent study schedule", + "Seek help from teachers or tutors", + "Consider retaking courses with low grades if possible" + ] + else: + analysis['rating'] = 'Below Average' + analysis['description'] = "āŒ Your GPA is below average. Please consult with your academic advisor." + analysis['comparison'] = "This is below the national average and may limit college options." + analysis['improvement_tips'] = [ + "Meet with your school counselor immediately", + "Develop a structured improvement plan", + "Consider summer school or credit recovery options", + "Focus on fundamental study skills" + ] + + # Add comparison between weighted and unweighted + if weighted_gpa > 0 and unweighted_gpa > 0: + diff = weighted_gpa - unweighted_gpa + if diff > 0.5: + analysis['comparison'] += "\n\nThe significant difference between your weighted and unweighted GPA suggests you're taking many advanced courses." + elif diff > 0.2: + analysis['comparison'] += "\n\nThe moderate difference between your weighted and unweighted GPA suggests a good balance of standard and advanced courses." + else: + analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses." + + return analysis + except: + return { + 'rating': 'Unknown', + 'description': 'Could not analyze GPA', + 'comparison': '', + 'improvement_tips': [] + } - return "\n\n".join(advice) if advice else "šŸŽÆ You're on track! Keep up the good work." - -def generate_college_recommendations(parsed_data: Dict) -> str: - try: - gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) - ap_count = sum(1 for course in parsed_data.get('course_history', []) - if course and "ADVANCED PLACEMENT" in course.get('description', '').upper()) - service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0)) - - recommendations = [] + def analyze_graduation_status(self, parsed_data: Dict) -> Dict: + """Enhanced graduation analysis with requirement breakdown""" + analysis = { + 'status': '', + 'completion_percentage': 0, + 'missing_requirements': [], + 'on_track': False, + 'timeline': '' + } - if gpa >= 4.0 and ap_count >= 5: - recommendations.append("šŸ›ļø Reach Schools: Ivy League, Stanford, MIT, etc.") - if gpa >= 3.7: - recommendations.append("šŸŽ“ Competitive Schools: Top public universities, selective private colleges") - if gpa >= 3.0: - recommendations.append("šŸ“š Good Match Schools: State flagship universities, many private colleges") - if gpa >= 2.0: - recommendations.append("šŸ« Safety Schools: Community colleges, open admission universities") - - # Add scholarship opportunities - if gpa >= 3.5: - recommendations.append("\nšŸ’° Scholarship Opportunities:\n- Bright Futures (Florida)\n- National Merit Scholarship\n- College-specific merit scholarships") - elif gpa >= 3.0: - recommendations.append("\nšŸ’° Scholarship Opportunities:\n- Local community scholarships\n- Special interest scholarships\n- First-generation student programs") - - # Add extracurricular advice - if service_hours < 50: - recommendations.append("\nšŸŽ­ Extracurricular Advice:\n- Colleges value depth over breadth in activities\n- Consider leadership roles in 1-2 organizations") - - if not recommendations: - return "āŒ Not enough data to generate college recommendations" - - return "Based on your academic profile:\n\n" + "\n\n".join(recommendations) - except: - return "āŒ Could not generate college recommendations" - -def create_gpa_visualization(parsed_data: Dict): - try: - gpa_data = { - "Type": ["Weighted GPA", "Unweighted GPA"], - "Value": [ - float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)), - float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0)) + try: + total_required = sum( + float(req.get('required', 0)) + for req in parsed_data.get('requirements', {}).values() + if req and str(req.get('required', '0')).replace('.', '').isdigit() + ) + + total_completed = sum( + float(req.get('completed', 0)) + for req in parsed_data.get('requirements', {}).values() + if req and str(req.get('completed', '0')).replace('.', '').isdigit() + ) + + analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0 + + # Identify missing requirements + analysis['missing_requirements'] = [ + { + 'code': code, + 'description': req.get('description', ''), + 'remaining': float(req.get('required', 0)) - float(req.get('completed', 0)), + 'status': req.get('status', '') + } + for code, req in parsed_data.get('requirements', {}).items() + if req and float(req.get('completed', 0)) < float(req.get('required', 0)) ] + + # Determine status message + current_grade = parsed_data.get('student_info', {}).get('grade', '') + grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '') + + if analysis['completion_percentage'] >= 100: + analysis['status'] = "šŸŽ‰ Congratulations! You've met all graduation requirements." + analysis['on_track'] = True + elif analysis['completion_percentage'] >= 90: + analysis['status'] = f"āœ… You've completed {analysis['completion_percentage']:.1f}% of requirements. Almost there!" + analysis['on_track'] = True + elif analysis['completion_percentage'] >= 75: + analysis['status'] = f"šŸ”„ You've completed {analysis['completion_percentage']:.1f}% of requirements. Keep working!" + analysis['on_track'] = True + elif analysis['completion_percentage'] >= 50: + analysis['status'] = f"āš ļø You've completed {analysis['completion_percentage']:.1f}% of requirements. Please meet with your counselor." + analysis['on_track'] = False + else: + analysis['status'] = f"āŒ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed." + analysis['on_track'] = False + + # Add timeline projection if possible + if current_grade and grad_year: + remaining_credits = total_required - total_completed + years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade) + + if years_remaining > 0: + credits_per_year = remaining_credits / years_remaining + analysis['timeline'] = ( + f"To graduate on time in {grad_year}, you need to complete approximately " + f"{credits_per_year:.1f} credits per year." + ) + + return analysis + except: + return { + 'status': 'Could not analyze graduation status', + 'completion_percentage': 0, + 'missing_requirements': [], + 'on_track': False, + 'timeline': '' + } + + def analyze_course_rigor(self, parsed_data: Dict) -> Dict: + """Analyze the difficulty level of courses taken""" + analysis = { + 'advanced_courses': 0, + 'honors_courses': 0, + 'ap_courses': 0, + 'ib_courses': 0, + 'de_courses': 0, + 'rating': '', + 'recommendations': [] } - df = pd.DataFrame(gpa_data) - fig = px.bar(df, x="Type", y="Value", title="GPA Comparison", - color="Type", text="Value", - color_discrete_sequence=["#4C78A8", "#F58518"]) - fig.update_traces(texttemplate='%{text:.2f}', textposition='outside') - fig.update_layout(yaxis_range=[0,5], uniformtext_minsize=8, uniformtext_mode='hide') - return fig - except: - return None - -def create_requirements_visualization(parsed_data: Dict): - try: - req_data = [] - for code, req in parsed_data.get('requirements', {}).items(): - if req and req.get('percent_complete'): - completion = float(req['percent_complete']) - req_data.append({ - "Requirement": code, - "Completion (%)": completion, - "Status": "Complete" if completion >= 100 else "Incomplete" - }) - if not req_data: - return None + try: + for course in parsed_data.get('course_history', []): + course_name = course.get('description', '').upper() + if 'AP' in course_name: + analysis['ap_courses'] += 1 + analysis['advanced_courses'] += 1 + elif 'IB' in course_name: + analysis['ib_courses'] += 1 + analysis['advanced_courses'] += 1 + elif 'DE' in course_name or 'DUAL ENROLLMENT' in course_name: + analysis['de_courses'] += 1 + analysis['advanced_courses'] += 1 + elif 'HONORS' in course_name: + analysis['honors_courses'] += 1 + analysis['advanced_courses'] += 1 - df = pd.DataFrame(req_data) - fig = px.bar(df, x="Requirement", y="Completion (%)", - title="Graduation Requirements Completion", - color="Status", - color_discrete_map={"Complete": "#2CA02C", "Incomplete": "#D62728"}, - hover_data=["Requirement"]) - fig.update_layout(xaxis={'categoryorder':'total descending'}) - return fig - except: - return None - -def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]: - """Process transcript file and return analysis results""" - try: - if not file_obj: - raise gr.Error("Please upload a transcript file first (PDF or image)") + total_advanced = analysis['advanced_courses'] + total_courses = len(parsed_data.get('course_history', [])) - validate_file(file_obj) - file_ext = os.path.splitext(file_obj.name)[1].lower() - - # Additional PDF validation - if file_ext == '.pdf': - try: - with open(file_obj.name, 'rb') as f: - PdfReader(f) # Test if PDF is readable - except Exception as e: - raise gr.Error(f"Invalid PDF file: {str(e)}. Please upload a non-corrupted PDF.") - - if progress: - progress(0.2, desc="Extracting text from file...") + if total_courses == 0: + return analysis + + advanced_percentage = (total_advanced / total_courses) * 100 + + if advanced_percentage >= 50: + analysis['rating'] = 'Very High Rigor' + analysis['recommendations'] = [ + "Your course rigor is excellent for college admissions", + "Consider adding 1-2 more advanced courses if manageable" + ] + elif advanced_percentage >= 30: + analysis['rating'] = 'High Rigor' + analysis['recommendations'] = [ + "Your course rigor is strong", + "Consider adding 1-2 more advanced courses next year" + ] + elif advanced_percentage >= 15: + analysis['rating'] = 'Moderate Rigor' + analysis['recommendations'] = [ + "Your course rigor is average", + "Consider adding more advanced courses to strengthen your profile" + ] + else: + analysis['rating'] = 'Low Rigor' + analysis['recommendations'] = [ + "Your course rigor is below average for college-bound students", + "Strongly consider adding advanced courses next semester", + "Meet with your counselor to discuss options" + ] + + return analysis + except: + return { + 'advanced_courses': 0, + 'honors_courses': 0, + 'ap_courses': 0, + 'ib_courses': 0, + 'de_courses': 0, + 'rating': 'Unknown', + 'recommendations': [] + } + + def generate_college_recommendations(self, parsed_data: Dict) -> Dict: + """Enhanced college recommendations based on full profile""" + recommendations = { + 'reach': [], + 'target': [], + 'safety': [], + 'scholarships': [], + 'improvement_areas': [] + } try: - text = extract_text_from_file(file_obj.name, file_ext) - except Exception as e: - raise ValueError(f"Failed to extract text: {str(e)}. The file may be corrupted or in an unsupported format.") - - if not text.strip(): - raise ValueError("The file appears to be empty or contains no readable text.") - - if progress: - progress(0.5, desc="Parsing transcript...") + # Get key metrics + weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) + rigor_analysis = self.analyze_course_rigor(parsed_data) + service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0)) - parser = TranscriptParser() - try: - parsed_data = parser.parse_transcript(text) - if not parsed_data: - raise ValueError("No data could be parsed from the transcript.") - except Exception as e: - raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}") - - # Perform enhanced analyses - gpa_analysis = analyze_gpa(parsed_data) - grad_status = analyze_graduation_status(parsed_data) - advice = generate_advice(parsed_data) - college_recs = generate_college_recommendations(parsed_data) - gpa_viz = create_gpa_visualization(parsed_data) - req_viz = create_requirements_visualization(parsed_data) - - # Format results for display - results = [ - f"šŸ“Š GPA Analysis: {gpa_analysis}", - f"šŸŽ“ Graduation Status: {grad_status}", - f"šŸ’” Recommendations:\n{advice}", - f"šŸ« College Recommendations:\n{college_recs}" - ] - - # Store all analysis results in the parsed_data - parsed_data['analysis'] = { - 'gpa_analysis': gpa_analysis, - 'grad_status': grad_status, - 'advice': advice, - 'college_recs': college_recs, - 'visualizations': { - 'gpa_viz': gpa_viz, - 'req_viz': req_viz + # Determine college tiers + if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100: + recommendations['reach'].extend([ + "Ivy League: Harvard, Yale, Princeton, Columbia, etc.", + "Stanford, MIT, CalTech, University of Chicago" + ]) + recommendations['target'].extend([ + "Top Public Universities: UCLA, UC Berkeley, UMich, UVA", + "Elite Liberal Arts: Williams, Amherst, Swarthmore" + ]) + elif weighted_gpa >= 4.0 and rigor_analysis['advanced_courses'] >= 6 and service_hours >= 80: + recommendations['reach'].extend([ + "Top 20 National Universities", + "Highly Selective Liberal Arts Colleges" + ]) + recommendations['target'].extend([ + "Top 50 National Universities", + "Selective Public Flagships", + "Top Liberal Arts Colleges" + ]) + elif weighted_gpa >= 3.7 and rigor_analysis['advanced_courses'] >= 4 and service_hours >= 60: + recommendations['reach'].extend([ + "Top 50 National Universities", + "Selective Liberal Arts Colleges" + ]) + recommendations['target'].extend([ + "State Flagship Universities", + "Good Regional Universities" + ]) + elif weighted_gpa >= 3.3 and rigor_analysis['advanced_courses'] >= 2 and service_hours >= 40: + recommendations['target'].extend([ + "State Universities", + "Many Private Colleges" + ]) + recommendations['safety'].extend([ + "Less Selective Private Colleges", + "Community Colleges with Transfer Programs" + ]) + else: + recommendations['target'].extend([ + "Open Admission Colleges", + "Some State Universities" + ]) + recommendations['safety'].extend([ + "Community Colleges", + "Technical Schools" + ]) + + # Scholarship recommendations + if weighted_gpa >= 4.0: + recommendations['scholarships'].extend([ + "National Merit Scholarship", + "Presidential Scholarships", + "College-Specific Full-Ride Scholarships" + ]) + elif weighted_gpa >= 3.7: + recommendations['scholarships'].extend([ + "Bright Futures (Florida)", + "State-Specific Merit Scholarships", + "Honors College Scholarships" + ]) + elif weighted_gpa >= 3.3: + recommendations['scholarships'].extend([ + "Local Community Scholarships", + "Special Interest Scholarships", + "First-Generation Student Programs" + ]) + + # Improvement areas + if weighted_gpa < 3.5: + recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring") + if rigor_analysis['advanced_courses'] < 4: + recommendations['improvement_areas'].append("Take more advanced courses (AP/IB/DE/Honors)") + if service_hours < 50: + recommendations['improvement_areas'].append("Increase community service involvement") + + return recommendations + except: + return { + 'reach': ["Could not generate recommendations"], + 'target': [], + 'safety': [], + 'scholarships': [], + 'improvement_areas': [] } + + def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict: + """Generate personalized study plan based on learning style and courses""" + plan = { + 'weekly_schedule': {}, + 'study_strategies': [], + 'time_management_tips': [], + 'resource_recommendations': [] } - return "\n\n".join(results), parsed_data - - except Exception as e: - error_msg = f"Error processing transcript: {str(e)}" - logging.error(error_msg) - raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<5MB)") - -# ========== LEARNING STYLE QUIZ ========== -class LearningStyleQuiz: - def __init__(self): - self.questions = [ - "When you study for a test, you prefer to:", - "When you need directions to a new place, you prefer:", - "When you learn a new skill, you prefer to:", - "When you're trying to concentrate, you:", - "When you meet new people, you remember them by:", - "When you're assembling furniture or a gadget, you:", - "When choosing a restaurant, you rely most on:", - "When you're in a waiting room, you typically:", - "When giving someone instructions, you tend to:", - "When you're trying to recall information, you:", - "When you're at a museum or exhibit, you:", - "When you're learning a new language, you prefer:", - "When you're taking notes in class, you:", - "When you're explaining something complex, you:", - "When you're at a party, you enjoy:", - "When you're trying to remember a phone number, you:", - "When you're relaxing, you prefer to:", - "When you're learning to use new software, you:", - "When you're giving a presentation, you rely on:", - "When you're solving a difficult problem, you:" - ] - - self.options = [ - ["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"], - ["Look at a map (Visual)", "Have someone tell you (Auditory)", "Write down directions (Reading/Writing)", "Try walking/driving there (Kinesthetic)"], - ["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"], - ["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"], - ["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"], - ["Read the instructions carefully (Reading/Writing)", "Look at the diagrams (Visual)", "Ask someone to explain (Auditory)", "Start putting pieces together (Kinesthetic)"], - ["Online photos of the food (Visual)", "Recommendations from friends (Auditory)", "Reading the menu online (Reading/Writing)", "Remembering how it felt to eat there (Kinesthetic)"], - ["Read magazines (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Fidget or move around (Kinesthetic)"], - ["Write them down (Reading/Writing)", "Explain verbally (Auditory)", "Demonstrate (Visual)", "Guide them physically (Kinesthetic)"], - ["See written words in your mind (Visual)", "Hear the information in your head (Auditory)", "Write it down to remember (Reading/Writing)", "Associate it with physical actions (Kinesthetic)"], - ["Read all the descriptions (Reading/Writing)", "Listen to audio guides (Auditory)", "Look at the displays (Visual)", "Touch interactive exhibits (Kinesthetic)"], - ["Study grammar rules (Reading/Writing)", "Listen to native speakers (Auditory)", "Use flashcards with images (Visual)", "Practice conversations (Kinesthetic)"], - ["Write detailed paragraphs (Reading/Writing)", "Record the lecture (Auditory)", "Draw diagrams and charts (Visual)", "Doodle while listening (Kinesthetic)"], - ["Write detailed steps (Reading/Writing)", "Explain verbally with examples (Auditory)", "Draw diagrams (Visual)", "Use physical objects to demonstrate (Kinesthetic)"], - ["Conversations with people (Auditory)", "Watching others or the environment (Visual)", "Writing notes or texting (Reading/Writing)", "Dancing or physical activities (Kinesthetic)"], - ["See the numbers in your head (Visual)", "Say them aloud (Auditory)", "Write them down (Reading/Writing)", "Dial them on a keypad (Kinesthetic)"], - ["Read a book (Reading/Writing)", "Listen to music (Auditory)", "Watch TV/movies (Visual)", "Do something physical (Kinesthetic)"], - ["Read the manual (Reading/Writing)", "Ask someone to show you (Visual)", "Call tech support (Auditory)", "Experiment with the software (Kinesthetic)"], - ["Detailed notes (Reading/Writing)", "Verbal explanations (Auditory)", "Visual slides (Visual)", "Physical demonstrations (Kinesthetic)"], - ["Write out possible solutions (Reading/Writing)", "Talk through it with someone (Auditory)", "Draw diagrams (Visual)", "Build a model or prototype (Kinesthetic)"] - ] - - self.learning_styles = { - "Visual": { - "description": "Visual learners prefer using images, diagrams, and spatial understanding.", - "tips": [ - "Use color coding in your notes", - "Create mind maps and diagrams", - "Watch educational videos", - "Use flashcards with images", - "Highlight important information in different colors" - ], - "careers": [ - "Graphic Designer", "Architect", "Photographer", - "Engineer", "Surgeon", "Pilot" - ] - }, - "Auditory": { - "description": "Auditory learners learn best through listening and speaking.", - "tips": [ - "Record lectures and listen to them", + try: + # Get current courses + current_courses = [ + course for course in parsed_data.get('course_history', []) + if course.get('status', '').lower() == 'in progress' + ] + + # Generate weekly schedule template + days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] + for day in days: + plan['weekly_schedule'][day] = [] + + # Add study blocks based on learning style + study_blocks = 2 # Default + if learning_style.lower() == 'visual': + study_blocks = 3 + plan['study_strategies'].extend([ + "Create colorful mind maps for each subject", + "Use flashcards with images and diagrams", + "Watch educational videos on topics" + ]) + elif learning_style.lower() == 'auditory': + study_blocks = 2 + plan['study_strategies'].extend([ + "Record yourself explaining concepts and listen back", "Participate in study groups", - "Explain concepts out loud to yourself", - "Use rhymes or songs to remember information", "Listen to educational podcasts" - ], - "careers": [ - "Musician", "Journalist", "Lawyer", - "Psychologist", "Teacher", "Customer Service" - ] - }, - "Reading/Writing": { - "description": "These learners prefer information displayed as words.", - "tips": [ - "Write detailed notes", - "Create summaries in your own words", - "Read textbooks and articles", - "Make lists to organize information", + ]) + elif learning_style.lower() == 'reading/writing': + study_blocks = 4 + plan['study_strategies'].extend([ + "Write detailed summaries in your own words", + "Create question-answer sets for each topic", "Rewrite your notes to reinforce learning" - ], - "careers": [ - "Writer", "Researcher", "Editor", - "Accountant", "Programmer", "Historian" - ] - }, - "Kinesthetic": { - "description": "Kinesthetic learners learn through movement and hands-on activities.", - "tips": [ - "Use hands-on activities", - "Take frequent movement breaks", - "Create physical models", - "Associate information with physical actions", - "Study while walking or pacing" - ], - "careers": [ - "Athlete", "Chef", "Mechanic", - "Dancer", "Physical Therapist", "Carpenter" - ] + ]) + elif learning_style.lower() == 'kinesthetic': + study_blocks = 3 + plan['study_strategies'].extend([ + "Create physical models or demonstrations", + "Study while walking or moving", + "Use hands-on activities when possible" + ]) + + # Distribute study blocks + for i, course in enumerate(current_courses): + day_index = i % 5 # Monday-Friday + day = days[day_index] + plan['weekly_schedule'][day].append({ + 'course': course.get('description', 'Course'), + 'duration': '45-60 minutes', + 'activities': [ + "Review notes", + "Complete practice problems", + "Prepare questions for teacher" + ] + }) + + # Add time management tips + plan['time_management_tips'].extend([ + "Use the Pomodoro technique (25 min study, 5 min break)", + "Prioritize assignments by due date and importance", + "Schedule regular review sessions" + ]) + + # Add resource recommendations + plan['resource_recommendations'].extend([ + "Khan Academy for math and science", + "Quizlet for flashcards", + "Wolfram Alpha for math help" + ]) + + return plan + except: + return { + 'weekly_schedule': {'Error': ["Could not generate schedule"]}, + 'study_strategies': [], + 'time_management_tips': [], + 'resource_recommendations': [] } + +# Initialize academic analyzer +academic_analyzer = AcademicAnalyzer() + +# ========== ENHANCED VISUALIZATION FUNCTIONS ========== +class DataVisualizer: + def __init__(self): + self.color_palette = { + 'complete': '#4CAF50', + 'incomplete': '#F44336', + 'in_progress': '#FFC107', + 'gpa_weighted': '#3F51B5', + 'gpa_unweighted': '#9C27B0', + 'core': '#3498DB', + 'electives': '#2ECC71', + 'arts_pe': '#9B59B6' } - def evaluate_quiz(self, *answers) -> str: - """Evaluate quiz answers and return learning style results""" - answers = list(answers) - if len(answers) != len(self.questions): - raise gr.Error("Please answer all questions before submitting") - - scores = {style: 0 for style in self.learning_styles} - - for i, answer in enumerate(answers): - if not answer: - continue + def create_gpa_visualization(self, parsed_data: Dict): + """Enhanced GPA visualization with more details""" + try: + gpa_data = { + "Type": ["Weighted GPA", "Unweighted GPA"], + "Value": [ + float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)), + float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0)) + ], + "Color": [self.color_palette['gpa_weighted'], self.color_palette['gpa_unweighted']] + } + + df = pd.DataFrame(gpa_data) + + fig = px.bar( + df, + x="Type", + y="Value", + title="GPA Comparison", + color="Type", + color_discrete_map={ + "Weighted GPA": self.color_palette['gpa_weighted'], + "Unweighted GPA": self.color_palette['gpa_unweighted'] + }, + text="Value", + hover_data={"Type": True, "Value": ":.2f"} + ) + + # Add reference lines and annotations + fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left") + fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left") + fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left") + + fig.update_traces( + texttemplate='%{text:.2f}', + textposition='outside', + marker_line_color='rgb(8,48,107)', + marker_line_width=1.5 + ) + + fig.update_layout( + yaxis_range=[0, 5], + uniformtext_minsize=8, + uniformtext_mode='hide', + plot_bgcolor='rgba(0,0,0,0)', + paper_bgcolor='rgba(0,0,0,0)', + font=dict(size=12) + ) + + return fig + except Exception as e: + logger.error(f"Error creating GPA visualization: {str(e)}") + return None + + def create_requirements_visualization(self, parsed_data: Dict): + """Enhanced requirements visualization with interactive elements""" + try: + req_data = [] + for code, req in parsed_data.get('requirements', {}).items(): + if req and req.get('percent_complete'): + completion = float(req['percent_complete']) + req_data.append({ + "Requirement": f"{code}: {req.get('description', '')[:30]}...", + "Completion (%)": completion, + "Status": "Complete" if completion >= 100 else "In Progress" if completion > 0 else "Not Started", + "Required": req.get('required', 0), + "Completed": req.get('completed', 0), + "Remaining": max(0, float(req.get('required', 0)) - float(req.get('completed', 0))) + }) + + if not req_data: + return None - for j, style in enumerate(self.learning_styles): - if answer == self.options[i][j]: - scores[style] += 1 - break - - total_answered = sum(1 for ans in answers if ans) - if total_answered == 0: - raise gr.Error("No answers provided") - - percentages = {style: (score/total_answered)*100 for style, score in scores.items()} - sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True) - - result = "## Your Learning Style Results\n\n" - result += "### Scores:\n" - for style, score in sorted_styles: - result += f"- **{style}**: {score}/{total_answered} ({percentages[style]:.1f}%)\n" - - max_score = max(scores.values()) - primary_styles = [style for style, score in scores.items() if score == max_score] - - result += "\n### Analysis:\n" - if len(primary_styles) == 1: - primary_style = primary_styles[0] - style_info = self.learning_styles[primary_style] - - result += f"Your primary learning style is **{primary_style}**\n\n" - result += f"**{primary_style} Characteristics**:\n" - result += f"{style_info['description']}\n\n" - - result += "**Recommended Study Strategies**:\n" - for tip in style_info['tips']: - result += f"- {tip}\n" - - result += "\n**Potential Career Paths**:\n" - for career in style_info['careers'][:6]: - result += f"- {career}\n" - - complementary = [s for s in sorted_styles if s[0] != primary_style][0][0] - result += f"\nYou might also benefit from some **{complementary}** strategies:\n" - for tip in self.learning_styles[complementary]['tips'][:3]: - result += f"- {tip}\n" - else: - result += "You have multiple strong learning styles:\n" - for style in primary_styles: - result += f"- **{style}**\n" - - result += "\n**Combined Learning Strategies**:\n" - result += "You may benefit from combining different learning approaches:\n" - for style in primary_styles: - result += f"\n**{style}** techniques:\n" - for tip in style_info['tips'][:2]: - result += f"- {tip}\n" + df = pd.DataFrame(req_data) + + fig = px.bar( + df, + x="Requirement", + y="Completion (%)", + title="Graduation Requirements Completion", + color="Status", + color_discrete_map={ + "Complete": self.color_palette['complete'], + "In Progress": self.color_palette['in_progress'], + "Not Started": self.color_palette['incomplete'] + }, + hover_data=["Required", "Completed", "Remaining"], + text="Completion (%)" + ) + + fig.update_traces( + texttemplate='%{text:.1f}%', + textposition='outside', + marker_line_color='rgb(8,48,107)', + marker_line_width=1.5 + ) + + fig.update_layout( + xaxis={'categoryorder':'total descending'}, + yaxis_range=[0, 100], + plot_bgcolor='rgba(0,0,0,0)', + paper_bgcolor='rgba(0,0,0,0)', + font=dict(size=12), + hovermode="x unified" + ) + + fig.add_hline(y=100, line_dash="dot", line_color="green") + + return fig + except Exception as e: + logger.error(f"Error creating requirements visualization: {str(e)}") + return None + + def create_credits_distribution_visualization(self, parsed_data: Dict): + """Enhanced credits distribution visualization""" + try: + core_credits = sum( + req['completed'] for req in parsed_data.get('requirements', {}).values() + if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D'] # English, Math, Science, Social Studies + ) + + elective_credits = sum( + req['completed'] for req in parsed_data.get('requirements', {}).values() + if req and req.get('code', '').split('-')[0] in ['G', 'H'] # Electives + ) + + other_credits = sum( + req['completed'] for req in parsed_data.get('requirements', {}).values() + if req and req.get('code', '').split('-')[0] in ['E', 'F'] # Arts, PE + ) + + credit_values = [core_credits, elective_credits, other_credits] + credit_labels = ['Core Subjects', 'Electives', 'Arts/PE'] + + if sum(credit_values) == 0: + return None - result += f"\n**{style}** career suggestions:\n" - for career in style_info['careers'][:3]: - result += f"- {career}\n" - - return result + df = pd.DataFrame({ + "Category": credit_labels, + "Credits": credit_values, + "Color": [self.color_palette['core'], self.color_palette['electives'], self.color_palette['arts_pe']] + }) + + fig = px.pie( + df, + values="Credits", + names="Category", + title="Credit Distribution", + color="Category", + color_discrete_map={ + "Core Subjects": self.color_palette['core'], + "Electives": self.color_palette['electives'], + "Arts/PE": self.color_palette['arts_pe'] + }, + hole=0.3 + ) + + fig.update_traces( + textposition='inside', + textinfo='percent+label', + marker=dict(line=dict(color='#FFFFFF', width=2)) + + fig.update_layout( + plot_bgcolor='rgba(0,0,0,0)', + paper_bgcolor='rgba(0,0,0,0)', + font=dict(size=12), + showlegend=False + ) + + return fig + except Exception as e: + logger.error(f"Error creating credits visualization: {str(e)}") + return None + + def create_course_rigor_visualization(self, parsed_data: Dict): + """Visualization of course rigor analysis""" + try: + rigor = academic_analyzer.analyze_course_rigor(parsed_data) + + data = { + "Type": ["AP", "IB", "DE", "Honors"], + "Count": [rigor['ap_courses'], rigor['ib_courses'], rigor['de_courses'], rigor['honors_courses']], + "Color": ["#E91E63", "#673AB7", "#009688", "#FF9800"] + } + + df = pd.DataFrame(data) + + fig = px.bar( + df, + x="Type", + y="Count", + title="Advanced Course Breakdown", + color="Type", + color_discrete_map={ + "AP": "#E91E63", + "IB": "#673AB7", + "DE": "#009688", + "Honors": "#FF9800" + }, + text="Count" + ) + + fig.update_traces( + textposition='outside', + marker_line_color='rgb(8,48,107)', + marker_line_width=1.5 + ) + + fig.update_layout( + plot_bgcolor='rgba(0,0,0,0)', + paper_bgcolor='rgba(0,0,0,0)', + font=dict(size=12), + xaxis_title="Course Type", + yaxis_title="Number of Courses" + ) + + return fig + except Exception as e: + logger.error(f"Error creating course rigor visualization: {str(e)}") + return None -learning_style_quiz = LearningStyleQuiz() +# Initialize visualizer +data_visualizer = DataVisualizer() -# ========== PROFILE MANAGEMENT ========== -class ProfileManager: +# ========== ENHANCED PROFILE MANAGEMENT ========== +class EnhancedProfileManager: def __init__(self): self.profiles_dir = Path(PROFILES_DIR) self.profiles_dir.mkdir(exist_ok=True, parents=True) self.current_session = None + self.encryptor = DataEncryptor(ENCRYPTION_KEY) def set_session(self, session_token: str) -> None: self.current_session = session_token def get_profile_path(self, name: str) -> Path: + name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] if self.current_session: - name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json" - return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json" + return self.profiles_dir / f"{name_hash}_profile.json" def save_profile(self, name: str, age: Union[int, str], interests: str, transcript: Dict, learning_style: str, movie: str, movie_reason: str, show: str, show_reason: str, book: str, book_reason: str, character: str, character_reason: str, - blog: str) -> str: + blog: str, study_plan: Dict = None) -> str: + """Enhanced profile saving with encryption and validation""" try: name = validate_name(name) age = validate_age(age) @@ -1218,9 +1478,10 @@ class ProfileManager: if not transcript: raise ValueError("Please complete the transcript analysis first.") - if not learning_style or "Your primary learning style is:" not in learning_style: + if not learning_style or "Your primary learning style is" not in learning_style: raise ValueError("Please complete the learning style quiz first.") + # Prepare favorites with sanitization favorites = { "movie": sanitize_input(movie), "movie_reason": sanitize_input(movie_reason), @@ -1232,89 +1493,124 @@ class ProfileManager: "character_reason": sanitize_input(character_reason) } + # Generate study plan if not provided + if not study_plan: + learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style) + if learning_style_match: + study_plan = academic_analyzer.generate_study_plan( + transcript, + learning_style_match.group(1) + ) + + # Prepare data with encryption for sensitive fields data = { - "name": name, + "name": self.encryptor.encrypt(name), "age": age, - "interests": sanitize_input(interests), - "transcript": transcript, + "interests": self.encryptor.encrypt(sanitize_input(interests)), + "transcript": transcript, # Already sanitized during parsing "learning_style": learning_style, "favorites": favorites, - "blog": sanitize_input(blog) if blog else "", + "blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "", + "study_plan": study_plan if study_plan else {}, "session_token": self.current_session, - "last_updated": time.time() + "last_updated": time.time(), + "version": "2.0" # Profile version for compatibility } filepath = self.get_profile_path(name) - with open(filepath, "w", encoding='utf-8') as f: + # Save with atomic write + temp_path = filepath.with_suffix('.tmp') + with open(temp_path, "w", encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) + temp_path.replace(filepath) # Atomic replace - if HF_TOKEN and 'hf_api' in globals(): + # Optional cloud backup + if HF_TOKEN and hf_api: try: hf_api.upload_file( path_or_fileobj=filepath, path_in_repo=f"profiles/{filepath.name}", repo_id="your-username/student-learning-assistant", - repo_type="dataset" + repo_type="dataset", + commit_message=f"Profile update for {name}" ) except Exception as e: - logging.error(f"Failed to upload to HF Hub: {str(e)}") + logger.error(f"Failed to upload to HF Hub: {str(e)}") - # Return simple confirmation with GPA if available - confirmation = f"Profile saved successfully for {name}." - if 'gpa' in data.get('transcript', {}).get('student_info', {}): - confirmation += f"\nGPA: {data['transcript']['student_info']['gpa']}" - return confirmation + return f"Profile saved successfully for {name}." except Exception as e: - logging.error(f"Profile validation error: {str(e)}") + logger.error(f"Profile save error: {str(e)}") raise gr.Error(f"Couldn't save profile: {str(e)}") - + def load_profile(self, name: str = None, session_token: str = None) -> Dict: - try: - if session_token: - profile_pattern = f"*{session_token}_profile.json" - else: - profile_pattern = "*.json" - - profiles = list(self.profiles_dir.glob(profile_pattern)) - if not profiles: - return {} - - if name: - name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] + """Enhanced profile loading with decryption and retries""" + for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS): + try: if session_token: - profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json" + profile_pattern = f"*{session_token}_profile.json" else: - profile_file = self.profiles_dir / f"{name_hash}_profile.json" + profile_pattern = "*.json" - if not profile_file.exists(): - if HF_TOKEN and 'hf_api' in globals(): - try: - hf_api.download_file( - path_in_repo=f"profiles/{profile_file.name}", - repo_id="your-username/student-learning-assistant", - repo_type="dataset", - local_dir=self.profiles_dir - ) - except: + profiles = list(self.profiles_dir.glob(profile_pattern)) + if not profiles: + return {} + + if name: + profile_file = self.get_profile_path(name) + if not profile_file.exists(): + # Try to download from Hugging Face Hub + if HF_TOKEN and hf_api: + try: + hf_api.download_file( + path_in_repo=f"profiles/{profile_file.name}", + repo_id="your-username/student-learning-assistant", + repo_type="dataset", + local_dir=self.profiles_dir + ) + except Exception as e: + logger.warning(f"Failed to download profile: {str(e)}") + raise gr.Error(f"No profile found for {name}") + else: raise gr.Error(f"No profile found for {name}") - else: - raise gr.Error(f"No profile found for {name}") - else: - profile_file = profiles[0] - - with open(profile_file, "r", encoding='utf-8') as f: - profile_data = json.load(f) + else: + # Load most recently modified profile + profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True) + profile_file = profiles[0] + + with open(profile_file, "r", encoding='utf-8') as f: + profile_data = json.load(f) + + # Check session timeout if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT: raise gr.Error("Session expired. Please start a new session.") + + # Decrypt encrypted fields + if profile_data.get('version', '1.0') == '2.0': + try: + profile_data['name'] = self.encryptor.decrypt(profile_data['name']) + profile_data['interests'] = self.encryptor.decrypt(profile_data.get('interests', '')) + if profile_data.get('blog'): + profile_data['blog'] = self.encryptor.decrypt(profile_data['blog']) + except Exception as e: + logger.error(f"Decryption error: {str(e)}") + raise gr.Error("Failed to decrypt profile data") + return profile_data - - except Exception as e: - logging.error(f"Error loading profile: {str(e)}") - return {} + + except json.JSONDecodeError as e: + if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1: + logger.error(f"Failed to load profile after {MAX_PROFILE_LOAD_ATTEMPTS} attempts") + raise gr.Error("Corrupted profile data") + time.sleep(0.5 * (attempt + 1)) + except Exception as e: + if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1: + raise + time.sleep(0.5 * (attempt + 1)) def list_profiles(self, session_token: str = None) -> List[str]: + """List available profiles with decrypted names""" if session_token: profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json")) else: @@ -1322,45 +1618,402 @@ class ProfileManager: profile_names = [] for p in profiles: - with open(p, "r", encoding='utf-8') as f: - try: + try: + with open(p, "r", encoding='utf-8') as f: data = json.load(f) - profile_names.append(data.get('name', p.stem)) - except json.JSONDecodeError: - continue + if data.get('version', '1.0') == '2.0': + try: + name = self.encryptor.decrypt(data['name']) + profile_names.append(name) + except: + profile_names.append(p.stem) + else: + profile_names.append(data.get('name', p.stem)) + except: + continue return profile_names + + def delete_profile(self, name: str, session_token: str = None) -> bool: + """Delete a profile with verification""" + try: + profile_file = self.get_profile_path(name) + if not profile_file.exists(): + return False + + # Verify the profile belongs to the current session + with open(profile_file, "r", encoding='utf-8') as f: + data = json.load(f) + if session_token and data.get('session_token') != session_token: + return False + + # Delete local file + profile_file.unlink() + + # Try to delete from Hugging Face Hub + if HF_TOKEN and hf_api: + try: + hf_api.delete_file( + path_in_repo=f"profiles/{profile_file.name}", + repo_id="your-username/student-learning-assistant", + repo_type="dataset" + ) + except Exception as e: + logger.error(f"Failed to delete from HF Hub: {str(e)}") + + return True + except Exception as e: + logger.error(f"Error deleting profile: {str(e)}") + return False -profile_manager = ProfileManager() +# Initialize enhanced profile manager +profile_manager = EnhancedProfileManager() -# ========== AI TEACHING ASSISTANT ========== -class TeachingAssistant: +# ========== ENHANCED AI TEACHING ASSISTANT ========== +class EnhancedTeachingAssistant: def __init__(self): self.context_history = [] - self.max_context_length = 5 + self.max_context_length = MAX_CONTEXT_HISTORY + self.model, self.tokenizer = None, None + self.last_model_load_attempt = 0 + + async def initialize_model(self): + """Lazy initialize the model with retries""" + if not self.model or not self.tokenizer: + if time.time() - self.last_model_load_attempt > 3600: # Retry every hour if failed + self.model, self.tokenizer = get_model_and_tokenizer() + self.last_model_load_attempt = time.time() async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str: + """Enhanced response generation with context awareness""" try: + await self.initialize_model() + profile = profile_manager.load_profile(session_token=session_token) if not profile: - return "Please complete and save your profile first." + return "Please complete and save your profile first to get personalized advice." self._update_context(message, history) - # Focus on GPA if mentioned - if "gpa" in message.lower(): - gpa = profile.get("transcript", {}).get("student_info", {}).get("gpa", "unknown") - return f"Your GPA is {gpa}. Would you like advice on improving it?" + # Get relevant profile information + student_name = profile.get('name', 'Student') + gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None) + learning_style = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", + profile.get('learning_style', '')) + learning_style = learning_style.group(1) if learning_style else None + + # Prepare context for the model + context = f"You are an AI teaching assistant helping {student_name}. " + if gpa: + context += f"{student_name}'s current weighted GPA is {gpa}. " + if learning_style: + context += f"They are a {learning_style.lower()} learner. " + + # Add recent conversation history + if self.context_history: + context += "Recent conversation:\n" + for item in self.context_history[-self.max_context_length:]: + role = "Student" if item['role'] == 'user' else "Assistant" + context += f"{role}: {item['content']}\n" + + # Generate response based on query type + query_type = self._classify_query(message) + response = await self._generate_typed_response(query_type, message, context, profile) - # Generic response otherwise - return "I'm your learning assistant. Ask me about your GPA, courses, or study tips." + return response + + except Exception as e: + logger.error(f"Error generating response: {str(e)}") + return "I encountered an error processing your request. Please try again." + + def _classify_query(self, message: str) -> str: + """Classify the type of user query""" + message_lower = message.lower() + + if any(word in message_lower for word in ['gpa', 'grade', 'average']): + return 'gpa' + elif any(word in message_lower for word in ['study', 'learn', 'exam', 'test']): + return 'study' + elif any(word in message_lower for word in ['course', 'class', 'subject']): + return 'courses' + elif any(word in message_lower for word in ['college', 'university', 'apply']): + return 'college' + elif any(word in message_lower for word in ['plan', 'schedule', 'calendar']): + return 'planning' + elif any(word in message_lower for word in ['resource', 'book', 'website']): + return 'resources' + else: + return 'general' + + async def _generate_typed_response(self, query_type: str, message: str, context: str, profile: Dict) -> str: + """Generate response based on query type""" + if query_type == 'gpa': + return self._generate_gpa_response(profile) + elif query_type == 'study': + return self._generate_study_response(profile) + elif query_type == 'courses': + return self._generate_courses_response(profile) + elif query_type == 'college': + return self._generate_college_response(profile) + elif query_type == 'planning': + return self._generate_planning_response(profile) + elif query_type == 'resources': + return self._generate_resources_response(profile) + else: + return await self._generate_general_response(message, context) + + def _generate_gpa_response(self, profile: Dict) -> str: + """Generate response about GPA""" + gpa = profile.get('transcript', {}).get('student_info', {}).get('weighted_gpa', None) + if not gpa: + return "I couldn't find your GPA information. Please upload your transcript first." + + analysis = academic_analyzer.analyze_gpa(profile['transcript']) + response = [ + f"Your current weighted GPA is **{gpa}**.", + analysis['description'], + analysis['comparison'] + ] + + if analysis['improvement_tips']: + response.append("\n**Improvement Tips:**") + response.extend([f"- {tip}" for tip in analysis['improvement_tips']]) + + return "\n\n".join(response) + + def _generate_study_response(self, profile: Dict) -> str: + """Generate study advice based on learning style""" + learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", + profile.get('learning_style', '')) + if not learning_style_match: + return "Please complete the learning style quiz first to get personalized study advice." + + learning_style = learning_style_match.group(1) + study_plan = profile.get('study_plan', {}) + + response = [ + f"As a **{learning_style}** learner, here are some study strategies for you:" + ] + + if study_plan.get('study_strategies'): + response.extend([f"- {strategy}" for strategy in study_plan['study_strategies']]) + else: + # Fallback if no study plan + if learning_style.lower() == 'visual': + response.extend([ + "- Use color coding in your notes", + "- Create mind maps and diagrams", + "- Watch educational videos to visualize concepts" + ]) + elif learning_style.lower() == 'auditory': + response.extend([ + "- Record lectures and listen to them", + "- Explain concepts out loud to yourself", + "- Participate in study groups" + ]) + elif learning_style.lower() == 'reading/writing': + response.extend([ + "- Write detailed summaries in your own words", + "- Create question-answer sets for each topic", + "- Rewrite your notes to reinforce learning" + ]) + elif learning_style.lower() == 'kinesthetic': + response.extend([ + "- Use hands-on activities when possible", + "- Study while moving or pacing", + "- Create physical models to represent concepts" + ]) + + if study_plan.get('time_management_tips'): + response.append("\n**Time Management Tips:**") + response.extend([f"- {tip}" for tip in study_plan['time_management_tips']]) + + return "\n\n".join(response) + + def _generate_courses_response(self, profile: Dict) -> str: + """Generate response about current/past courses""" + transcript = profile.get('transcript', {}) + if not transcript.get('course_history'): + return "I couldn't find your course information. Please upload your transcript first." + + # Get current courses (in progress) + current_courses = [ + course for course in transcript['course_history'] + if course.get('status', '').lower() == 'in progress' + ] + + # Get past completed courses + completed_courses = [ + course for course in transcript['course_history'] + if course.get('status', '').lower() == 'completed' + ] + + response = [] + + if current_courses: + response.append("**Your Current Courses:**") + for course in current_courses[:5]: # Limit to 5 courses + response.append( + f"- {course.get('description', 'Unknown')} " + f"({course.get('course_code', '')})" + ) + else: + response.append("I couldn't find any current courses in your transcript.") + + if completed_courses: + response.append("\n**Recently Completed Courses:**") + for course in completed_courses[:5]: # Limit to 5 courses + grade = course.get('grade_earned', '') + if grade: + response.append( + f"- {course.get('description', 'Unknown')} " + f"(Grade: {grade})" + ) + else: + response.append(f"- {course.get('description', 'Unknown')}") + # Add rigor analysis + rigor = academic_analyzer.analyze_course_rigor(transcript) + if rigor['rating']: + response.append(f"\n**Course Rigor Analysis:** {rigor['rating']}") + if rigor['recommendations']: + response.append("\n**Recommendations:**") + response.extend([f"- {rec}" for rec in rigor['recommendations']]) + + return "\n".join(response) + + def _generate_college_response(self, profile: Dict) -> str: + """Generate college recommendations""" + recommendations = academic_analyzer.generate_college_recommendations(profile.get('transcript', {})) + + response = ["**College Recommendations Based on Your Profile:**"] + + if recommendations['reach']: + response.append("\n**Reach Schools (Competitive):**") + response.extend([f"- {school}" for school in recommendations['reach'][:3]]) + + if recommendations['target']: + response.append("\n**Target Schools (Good Match):**") + response.extend([f"- {school}" for school in recommendations['target'][:3]]) + + if recommendations['safety']: + response.append("\n**Safety Schools (Likely Admission):**") + response.extend([f"- {school}" for school in recommendations['safety'][:3]]) + + if recommendations['scholarships']: + response.append("\n**Scholarship Opportunities:**") + response.extend([f"- {scholarship}" for scholarship in recommendations['scholarships'][:3]]) + + if recommendations['improvement_areas']: + response.append("\n**Areas to Improve for College Admissions:**") + response.extend([f"- {area}" for area in recommendations['improvement_areas']]) + + return "\n".join(response) + + def _generate_planning_response(self, profile: Dict) -> str: + """Generate study/schedule planning advice""" + study_plan = profile.get('study_plan', {}) + + response = ["**Study Planning Advice:**"] + + if study_plan.get('weekly_schedule'): + response.append("\nHere's a suggested weekly study schedule:") + for day, activities in study_plan['weekly_schedule'].items(): + if activities: + response.append(f"\n**{day}:**") + for activity in activities[:2]: # Show 2 activities per day max + response.append( + f"- {activity.get('course', 'Course')}: " + f"{activity.get('duration', '45-60 minutes')}" + ) + else: + response.append("\nA good study schedule should include:") + response.append("- 45-60 minute study blocks with short breaks") + response.append("- Focus on 1-2 subjects per day") + response.append("- Regular review sessions") + + if study_plan.get('time_management_tips'): + response.append("\n**Time Management Tips:**") + response.extend([f"- {tip}" for tip in study_plan['time_management_tips'][:3]]) + + return "\n".join(response) + + def _generate_resources_response(self, profile: Dict) -> str: + """Generate resource recommendations""" + study_plan = profile.get('study_plan', {}) + transcript = profile.get('transcript', {}) + + response = ["**Recommended Learning Resources:**"] + + # General resources + if study_plan.get('resource_recommendations'): + response.extend([f"- {resource}" for resource in study_plan['resource_recommendations'][:3]]) + else: + response.extend([ + "- Khan Academy (free lessons on many subjects)", + "- Quizlet (flashcards and study tools)", + "- Wolfram Alpha (math and science help)" + ]) + + # Subject-specific resources + current_courses = [ + course for course in transcript.get('course_history', []) + if course.get('status', '').lower() == 'in progress' + ] + + if current_courses: + response.append("\n**Course-Specific Resources:**") + for course in current_courses[:2]: # Limit to 2 courses + course_name = course.get('description', 'your course') + if 'MATH' in course_name.upper(): + response.append(f"- For {course_name}: Desmos Graphing Calculator, Art of Problem Solving") + elif 'SCIENCE' in course_name.upper(): + response.append(f"- For {course_name}: PhET Simulations, Crash Course Science videos") + elif 'HISTORY' in course_name.upper(): + response.append(f"- For {course_name}: Crash Course History videos, Library of Congress resources") + + return "\n".join(response) + + async def _generate_general_response(self, message: str, context: str) -> str: + """Generate response using the language model""" + if not self.model or not self.tokenizer: + return "I'm still loading my knowledge base. Please try again in a moment." + + try: + prompt = f"{context}\nStudent: {message}\nAssistant:" + + inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) + + # Generate response with more controlled parameters + outputs = self.model.generate( + **inputs, + max_new_tokens=200, + temperature=0.7, + top_p=0.9, + repetition_penalty=1.1, + do_sample=True + ) + + response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract just the assistant's response + response = response[len(prompt):].strip() + + # Clean up any incomplete sentences + if response and response[-1] not in {'.', '!', '?'}: + last_period = response.rfind('.') + if last_period > 0: + response = response[:last_period + 1] + + return response if response else "I'm not sure how to respond to that. Could you rephrase your question?" except Exception as e: - logging.error(f"Error generating response: {str(e)}") - return "I encountered an error. Please try again." + logger.error(f"Model generation error: {str(e)}") + return "I encountered an error generating a response. Please try again." def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None: + """Update conversation context""" self.context_history.append({"role": "user", "content": message}) + if history: for h in history[-self.max_context_length:]: if h[0]: @@ -1368,12 +2021,276 @@ class TeachingAssistant: if h[1]: self.context_history.append({"role": "assistant", "content": h[1]}) - self.context_history = self.context_history[-(self.max_context_length*2):] + # Trim to max context length + self.context_history = self.context_history[-(self.max_context_length * 2):] + +# Initialize enhanced teaching assistant +teaching_assistant = EnhancedTeachingAssistant() + +# ========== STUDY CALENDAR INTEGRATION ========== +class StudyCalendar: + def __init__(self): + self.calendar_events = {} + + def generate_study_calendar(self, profile: Dict, start_date: str = None, weeks: int = 4) -> Dict: + """Generate a study calendar for the given profile""" + try: + if not start_date: + start_date = datetime.date.today().isoformat() + + start_date = datetime.date.fromisoformat(start_date) + study_plan = profile.get('study_plan', {}) + + calendar = { + 'start_date': start_date.isoformat(), + 'end_date': (start_date + datetime.timedelta(weeks=weeks)).isoformat(), + 'events': [], + 'exams': [], + 'assignments': [] + } + + # Add study sessions from the study plan + if study_plan.get('weekly_schedule'): + for day_offset in range(weeks * 7): + current_date = start_date + datetime.timedelta(days=day_offset) + day_name = calendar.day_name[current_date.weekday()] + + if day_name in study_plan['weekly_schedule']: + for session in study_plan['weekly_schedule'][day_name]: + calendar['events'].append({ + 'date': current_date.isoformat(), + 'title': f"Study {session.get('course', '')}", + 'description': "\n".join(session.get('activities', [])), + 'duration': session.get('duration', '45-60 minutes'), + 'type': 'study' + }) + + # Add exam dates from transcript (if available) + transcript = profile.get('transcript', {}) + if transcript.get('course_history'): + for course in transcript['course_history']: + if course.get('status', '').lower() == 'in progress': + # Simulate some exam dates (in a real app, these would come from the school calendar) + midterm_date = (start_date + datetime.timedelta(weeks=2)).isoformat() + final_date = (start_date + datetime.timedelta(weeks=weeks - 1)).isoformat() + + calendar['exams'].append({ + 'date': midterm_date, + 'title': f"{course.get('description', 'Course')} Midterm", + 'course': course.get('description', ''), + 'type': 'exam' + }) + + calendar['exams'].append({ + 'date': final_date, + 'title': f"{course.get('description', 'Course')} Final", + 'course': course.get('description', ''), + 'type': 'exam' + }) + + return calendar + except Exception as e: + logger.error(f"Error generating calendar: {str(e)}") + return { + 'start_date': datetime.date.today().isoformat(), + 'end_date': (datetime.date.today() + datetime.timedelta(weeks=4)).isoformat(), + 'events': [], + 'exams': [], + 'assignments': [] + } + + def create_calendar_visualization(self, calendar_data: Dict) -> Optional[plt.Figure]: + """Create a visualization of the study calendar""" + try: + import matplotlib.pyplot as plt + from matplotlib.patches import Rectangle + + # Prepare data + start_date = datetime.date.fromisoformat(calendar_data['start_date']) + end_date = datetime.date.fromisoformat(calendar_data['end_date']) + days = (end_date - start_date).days + 1 + + # Create figure + fig, ax = plt.subplots(figsize=(12, 6)) + + # Draw week grid + for i in range(0, days, 7): + ax.add_patch(Rectangle((i, 0), 7, 1, color='#f5f5f5')) + + # Add study events + for event in calendar_data['events']: + event_date = datetime.date.fromisoformat(event['date']) + day_offset = (event_date - start_date).days + ax.add_patch(Rectangle((day_offset, 0.7), 1, 0.3, color='#4CAF50')) + + # Add exams + for exam in calendar_data['exams']: + exam_date = datetime.date.fromisoformat(exam['date']) + day_offset = (exam_date - start_date).days + ax.add_patch(Rectangle((day_offset, 0.3), 1, 0.3, color='#F44336')) + + # Configure axes + ax.set_xlim(0, days) + ax.set_ylim(0, 1) + ax.set_xticks(range(0, days, 7)) + ax.set_xticklabels([(start_date + datetime.timedelta(days=x)).strftime('%b %d') + for x in range(0, days, 7)]) + ax.set_yticks([0.5]) + ax.set_yticklabels(['Study Calendar']) + + # Add legend + ax.add_patch(Rectangle((days-5, 0.7), 1, 0.3, color='#4CAF50')) + ax.text(days-3.5, 0.85, 'Study Sessions', va='center') + ax.add_patch(Rectangle((days-5, 0.3), 1, 0.3, color='#F44336')) + ax.text(days-3.5, 0.45, 'Exams', va='center') + + plt.title(f"Study Calendar: {start_date.strftime('%b %d')} to {end_date.strftime('%b %d')}") + plt.tight_layout() + + return fig + except Exception as e: + logger.error(f"Error creating calendar visualization: {str(e)}") + return None + +# Initialize study calendar +study_calendar = StudyCalendar() + +# ========== GOAL TRACKING SYSTEM ========== +class GoalTracker: + def __init__(self): + self.goals = {} + + def add_goal(self, profile_name: str, goal_type: str, description: str, + target_date: str, target_value: float = None) -> bool: + """Add a new goal for the student""" + try: + goal_id = hashlib.sha256(f"{profile_name}{goal_type}{description}{time.time()}".encode()).hexdigest()[:16] + + self.goals[goal_id] = { + 'profile_name': profile_name, + 'type': goal_type, + 'description': description, + 'target_date': target_date, + 'target_value': target_value, + 'created': time.time(), + 'progress': [], + 'completed': False + } + + return True + except Exception as e: + logger.error(f"Error adding goal: {str(e)}") + return False + + def update_goal_progress(self, goal_id: str, progress_value: float, notes: str = "") -> bool: + """Update progress toward a goal""" + try: + if goal_id not in self.goals: + return False + + self.goals[goal_id]['progress'].append({ + 'date': time.time(), + 'value': progress_value, + 'notes': notes + }) + + # Check if goal is completed + if self.goals[goal_id].get('target_value') is not None: + if progress_value >= self.goals[goal_id]['target_value']: + self.goals[goal_id]['completed'] = True + + return True + except Exception as e: + logger.error(f"Error updating goal: {str(e)}") + return False + + def get_goals(self, profile_name: str) -> List[Dict]: + """Get all goals for a student""" + return [ + {**goal, 'id': goal_id} + for goal_id, goal in self.goals.items() + if goal['profile_name'] == profile_name + ] + + def create_goal_visualization(self, goals: List[Dict]) -> Optional[plt.Figure]: + """Create a visualization of goal progress""" + try: + import matplotlib.pyplot as plt + + if not goals: + return None + + # Prepare data + goal_names = [goal['description'][:20] + ('...' if len(goal['description']) > 20 else '') + for goal in goals] + progress_values = [ + goal['progress'][-1]['value'] if goal['progress'] else 0 + for goal in goals + ] + target_values = [ + goal['target_value'] if goal['target_value'] is not None else progress_values[i] + for i, goal in enumerate(goals) + ] + + # Create figure + fig, ax = plt.subplots(figsize=(10, 6)) + + # Plot bars + x = range(len(goals)) + bar_width = 0.35 + + progress_bars = ax.bar( + [i - bar_width/2 for i in x], + progress_values, + bar_width, + label='Current Progress', + color='#4CAF50' + ) + + target_bars = ax.bar( + [i + bar_width/2 for i in x], + target_values, + bar_width, + label='Target', + color='#2196F3' + ) + + # Add labels and title + ax.set_xlabel('Goals') + ax.set_ylabel('Progress') + ax.set_title('Goal Progress Tracking') + ax.set_xticks(x) + ax.set_xticklabels(goal_names, rotation=45, ha='right') + ax.legend() + + # Add value labels + for bar in progress_bars: + height = bar.get_height() + ax.annotate(f'{height:.1f}', + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom') + + for bar in target_bars: + height = bar.get_height() + ax.annotate(f'{height:.1f}', + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom') + + plt.tight_layout() + return fig + except Exception as e: + logger.error(f"Error creating goal visualization: {str(e)}") + return None -teaching_assistant = TeachingAssistant() +# Initialize goal tracker +goal_tracker = GoalTracker() -# ========== GRADIO INTERFACE ========== -def create_interface(): +# ========== ENHANCED GRADIO INTERFACE ========== +def create_enhanced_interface(): with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app: session_token = gr.State(value=generate_session_token()) profile_manager.set_session(session_token.value) @@ -1383,60 +2300,190 @@ def create_interface(): 1: False, # Learning Style Quiz 2: False, # Personal Questions 3: False, # Save & Review - 4: False # AI Assistant + 4: False, # AI Assistant + 5: False # Goals & Planning }) - # Custom CSS + # Custom CSS with enhanced styling app.css = """ - .gradio-container { max-width: 1200px !important; margin: 0 auto !important; } - .tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; } - .completed-tab { background: #4CAF50 !important; color: white !important; } - .incomplete-tab { background: #E0E0E0 !important; } - .nav-message { padding: 10px; margin: 10px 0; border-radius: 4px; background-color: #ffebee; color: #c62828; } - .file-upload { border: 2px dashed #4CAF50 !important; padding: 20px !important; border-radius: 8px !important; text-align: center; } - .file-upload:hover { background: #f5f5f5; } - .progress-bar { height: 5px; background: linear-gradient(to right, #4CAF50, #8BC34A); margin-bottom: 15px; border-radius: 3px; } - .quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; } - .quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; } - .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; } - .transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; } - .error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; } - .metric-box { background-color: white; border-radius: 10px; padding: 15px; margin: 10px 0; box-shadow: 0 2px 5px rgba(0,0,0,0.1); } - .recommendation { background-color: #fff8e1; padding: 10px; border-left: 4px solid #ffc107; margin: 5px 0; } + .gradio-container { + max-width: 1200px !important; + margin: 0 auto !important; + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + } + .tab-content { + padding: 20px !important; + border: 1px solid #e0e0e0 !important; + border-radius: 8px !important; + margin-top: 10px !important; + background-color: white; + box-shadow: 0 2px 4px rgba(0,0,0,0.05); + } + .completed-tab { + background: #4CAF50 !important; + color: white !important; + font-weight: bold; + } + .incomplete-tab { + background: #E0E0E0 !important; + color: #616161; + } + .nav-message { + padding: 12px; + margin: 10px 0; + border-radius: 6px; + background-color: #ffebee; + color: #c62828; + border-left: 4px solid #c62828; + } + .file-upload { + border: 2px dashed #4CAF50 !important; + padding: 25px !important; + border-radius: 8px !important; + text-align: center; + background-color: #f8f8f8; + } + .file-upload:hover { + background: #f1f8e9; + } + .progress-bar { + height: 6px; + background: linear-gradient(to right, #4CAF50, #8BC34A); + margin-bottom: 15px; + border-radius: 3px; + box-shadow: inset 0 1px 2px rgba(0,0,0,0.1); + } + .quiz-question { + margin-bottom: 15px; + padding: 15px; + background: #f5f5f5; + border-radius: 5px; + border-left: 4px solid #2196F3; + } + .quiz-results { + margin-top: 20px; + padding: 20px; + background: #e8f5e9; + border-radius: 8px; + border-left: 4px solid #4CAF50; + } + .error-message { + color: #d32f2f; + background-color: #ffebee; + padding: 12px; + border-radius: 6px; + margin: 10px 0; + border-left: 4px solid #d32f2f; + } + .transcript-results { + border-left: 4px solid #4CAF50 !important; + padding: 15px !important; + background: #f8f8f8 !important; + border-radius: 4px; + } + .error-box { + border: 1px solid #ff4444 !important; + background: #fff8f8 !important; + border-radius: 4px; + } + .metric-box { + background-color: white; + border-radius: 10px; + padding: 15px; + margin: 10px 0; + box-shadow: 0 2px 5px rgba(0,0,0,0.1); + border-left: 4px solid #2196F3; + } + .recommendation { + background-color: #fff8e1; + padding: 10px; + border-left: 4px solid #ffc107; + margin: 5px 0; + border-radius: 4px; + } + .goal-card { + background-color: white; + border-radius: 8px; + padding: 15px; + margin: 10px 0; + box-shadow: 0 1px 3px rgba(0,0,0,0.1); + border-left: 4px solid #4CAF50; + } + .calendar-event { + background-color: #e3f2fd; + border-radius: 6px; + padding: 10px; + margin: 5px 0; + border-left: 4px solid #2196F3; + } - .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; } - .dark .quiz-question { background-color: #3d3d3d !important; } - .dark .quiz-results { background-color: #2e3d2e !important; } - .dark textarea, .dark input { background-color: #333 !important; color: #eee !important; } - .dark .output-markdown { color: #eee !important; } - .dark .chatbot { background-color: #333 !important; } - .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; } - .dark .metric-box { background-color: #333 !important; } + /* Dark mode styles */ + .dark .tab-content { + background-color: #2d2d2d !important; + border-color: #444 !important; + color: #eee !important; + } + .dark .quiz-question { + background-color: #3d3d3d !important; + color: #eee !important; + } + .dark .quiz-results { + background-color: #2e3d2e !important; + color: #eee !important; + } + .dark textarea, .dark input { + background-color: #333 !important; + color: #eee !important; + border-color: #555 !important; + } + .dark .output-markdown { + color: #eee !important; + } + .dark .chatbot { + background-color: #333 !important; + } + .dark .chatbot .user, .dark .chatbot .assistant { + color: #eee !important; + } + .dark .metric-box { + background-color: #333 !important; + color: #eee !important; + } + .dark .goal-card { + background-color: #333; + color: #eee; + } + .dark .calendar-event { + background-color: #1a3d5c; + color: #eee; + } """ - # Header + # Header with improved layout with gr.Row(): with gr.Column(scale=4): gr.Markdown(""" - # Student Learning Assistant + # šŸ“š Student Learning Assistant **Your personalized education companion** - Complete each step to get customized learning recommendations. + Complete each step to get customized learning recommendations and academic planning. """) with gr.Column(scale=1): dark_mode = gr.Checkbox(label="Dark Mode", value=False) - # Navigation buttons + # Navigation buttons with icons with gr.Row(): with gr.Column(scale=1, min_width=100): - step1 = gr.Button("1. Transcript", elem_classes="incomplete-tab") + step1 = gr.Button("šŸ“„ 1. Transcript", elem_classes="incomplete-tab") + with gr.Column(scale=1, min_width=100): + step2 = gr.Button("šŸ“ 2. Quiz", elem_classes="incomplete-tab", interactive=False) with gr.Column(scale=1, min_width=100): - step2 = gr.Button("2. Quiz", elem_classes="incomplete-tab", interactive=False) + step3 = gr.Button("šŸ‘¤ 3. Profile", elem_classes="incomplete-tab", interactive=False) with gr.Column(scale=1, min_width=100): - step3 = gr.Button("3. Profile", elem_classes="incomplete-tab", interactive=False) + step4 = gr.Button("šŸ” 4. Review", elem_classes="incomplete-tab", interactive=False) with gr.Column(scale=1, min_width=100): - step4 = gr.Button("4. Review", elem_classes="incomplete-tab", interactive=False) + step5 = gr.Button("šŸ’¬ 5. Assistant", elem_classes="incomplete-tab", interactive=False) with gr.Column(scale=1, min_width=100): - step5 = gr.Button("5. Assistant", elem_classes="incomplete-tab", interactive=False) + step6 = gr.Button("šŸŽÆ 6. Goals", elem_classes="incomplete-tab", interactive=False) nav_message = gr.HTML(visible=False) @@ -1446,7 +2493,7 @@ def create_interface(): with gr.Tab("Transcript", id=0): with gr.Row(): with gr.Column(scale=1): - gr.Markdown("### Step 1: Upload Your Transcript") + gr.Markdown("### šŸ“„ Step 1: Upload Your Transcript") with gr.Group(elem_classes="file-upload"): file_input = gr.File( label="Drag and drop your transcript here (PDF or Image)", @@ -1466,6 +2513,9 @@ def create_interface(): with gr.Row(): gpa_viz = gr.Plot(label="GPA Visualization", visible=False) req_viz = gr.Plot(label="Requirements Visualization", visible=False) + with gr.Row(): + credits_viz = gr.Plot(label="Credits Distribution", visible=False) + rigor_viz = gr.Plot(label="Course Rigor", visible=False) transcript_data = gr.State() file_input.change( @@ -1479,21 +2529,69 @@ def create_interface(): ) def process_and_visualize(file_obj, tab_status): - results, data = parse_transcript(file_obj) - - # Update visualizations - gpa_viz_update = gr.update(visible=data.get('analysis', {}).get('visualizations', {}).get('gpa_viz') is not None) - req_viz_update = gr.update(visible=data.get('analysis', {}).get('visualizations', {}).get('req_viz') is not None) - - # Update tab completion status - tab_status[0] = True - - return results, data, gpa_viz_update, req_viz_update, tab_status + try: + # Parse transcript with enhanced parser + parsed_data = transcript_parser.parse_transcript(file_obj.name, os.path.splitext(file_obj.name)[1].lower()) + + # Generate analyses + gpa_analysis = academic_analyzer.analyze_gpa(parsed_data) + grad_status = academic_analyzer.analyze_graduation_status(parsed_data) + college_recs = academic_analyzer.generate_college_recommendations(parsed_data) + + # Format results + results = [ + f"## šŸ“Š GPA Analysis", + f"**Rating:** {gpa_analysis['rating']}", + f"{gpa_analysis['description']}", + f"{gpa_analysis['comparison']}", + "", + f"## šŸŽ“ Graduation Status", + grad_status['status'], + f"**Completion:** {grad_status['completion_percentage']:.1f}%", + "", + f"## šŸ« College Recommendations" + ] + + if college_recs['reach']: + results.append("\n**Reach Schools:**") + results.extend([f"- {school}" for school in college_recs['reach'][:3]]) + + if college_recs['target']: + results.append("\n**Target Schools:**") + results.extend([f"- {school}" for school in college_recs['target'][:3]]) + + if college_recs['safety']: + results.append("\n**Safety Schools:**") + results.extend([f"- {school}" for school in college_recs['safety'][:3]]) + + # Store all analysis results + parsed_data['analysis'] = { + 'gpa_analysis': gpa_analysis, + 'grad_status': grad_status, + 'college_recs': college_recs + } + + # Update visualizations + viz_updates = [ + gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None), + gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None), + gr.update(visible=data_visualizer.create_credits_distribution_visualization(parsed_data) is not None), + gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None) + ] + + # Update tab completion status + tab_status[0] = True + + return "\n".join(results), parsed_data, *viz_updates, tab_status + except Exception as e: + error_msg = f"Error processing transcript: {str(e)}" + logger.error(error_msg) + raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<{MAX_FILE_SIZE_MB}MB)") upload_btn.click( fn=process_and_visualize, inputs=[file_input, tab_completed], - outputs=[transcript_output, transcript_data, gpa_viz, req_viz, tab_completed] + outputs=[transcript_output, transcript_data, gpa_viz, req_viz, credits_viz, rigor_viz, tab_completed] ).then( fn=lambda: gr.update(elem_classes="completed-tab"), outputs=step1 @@ -1505,7 +2603,7 @@ def create_interface(): # ===== TAB 2: LEARNING STYLE QUIZ ===== with gr.Tab("Learning Style Quiz", id=1): with gr.Column(): - gr.Markdown("### Step 2: Discover Your Learning Style") + gr.Markdown("### šŸ“ Step 2: Discover Your Learning Style") progress = gr.HTML("
") quiz_components = [] @@ -1572,7 +2670,7 @@ def create_interface(): with gr.Tab("Personal Profile", id=2): with gr.Row(): with gr.Column(scale=1): - gr.Markdown("### Step 3: Tell Us About Yourself") + gr.Markdown("### šŸ‘¤ Step 3: Tell Us About Yourself") with gr.Group(): name = gr.Textbox(label="Full Name", placeholder="Your name") age = gr.Number(label="Age", minimum=MIN_AGE, maximum=MAX_AGE, precision=0) @@ -1585,7 +2683,7 @@ def create_interface(): save_confirmation = gr.HTML(visible=False) with gr.Column(scale=1): - gr.Markdown("### Favorites") + gr.Markdown("### ā¤ļø Favorites") with gr.Group(): movie = gr.Textbox(label="Favorite Movie") movie_reason = gr.Textbox(label="Why do you like it?", lines=2) @@ -1618,7 +2716,7 @@ def create_interface(): with gr.Tab("Save Profile", id=3): with gr.Row(): with gr.Column(scale=1): - gr.Markdown("### Step 4: Review & Save Your Profile") + gr.Markdown("### šŸ” Step 4: Review & Save Your Profile") with gr.Group(): load_profile_dropdown = gr.Dropdown( label="Load Existing Profile", @@ -1639,7 +2737,7 @@ def create_interface(): ) with gr.Row(): req_viz_matplotlib = gr.Plot(label="Requirements Progress", visible=False) - credits_viz = gr.Plot(label="Credits Distribution", visible=False) + credits_viz_matplotlib = gr.Plot(label="Credits Distribution", visible=False) save_btn.click( fn=profile_manager.save_profile, @@ -1651,11 +2749,11 @@ def create_interface(): outputs=output_summary ).then( fn=lambda td: ( - gr.update(visible=MATPLOTLIB_AVAILABLE and bool(td and 'requirements' in td)), - gr.update(visible=MATPLOTLIB_AVAILABLE and bool(td and 'requirements' in td)) + gr.update(visible=data_visualizer.create_requirements_visualization(td) is not None), + gr.update(visible=data_visualizer.create_credits_distribution_visualization(td) is not None) ), inputs=transcript_data, - outputs=[req_viz_matplotlib, credits_viz] + outputs=[req_viz_matplotlib, credits_viz_matplotlib] ).then( fn=lambda: {3: True}, inputs=None, @@ -1666,6 +2764,9 @@ def create_interface(): ).then( fn=lambda: gr.update(interactive=True), outputs=step5 + ).then( + fn=lambda: gr.update(interactive=True), + outputs=step6 ).then( fn=lambda: profile_manager.list_profiles(session_token.value), outputs=load_profile_dropdown @@ -1677,10 +2778,9 @@ def create_interface(): outputs=delete_btn ) - # Create visualizations when profile is loaded load_btn.click( - fn=lambda name: profile_manager.load_profile(name, session_token.value), - inputs=load_profile_dropdown, + fn=lambda: profile_manager.load_profile(load_profile_dropdown.value, session_token.value), + inputs=None, outputs=None ).then( fn=lambda profile: ( @@ -1699,8 +2799,8 @@ def create_interface(): profile.get('blog', ''), profile.get('transcript', {}), gr.update(value="Profile loaded successfully!"), - create_requirements_visualization_matplotlib(profile.get('transcript', {}).get('requirements', [])), - create_credits_distribution_visualization(profile.get('transcript', {}).get('requirements', [])) + data_visualizer.create_requirements_visualization(profile.get('transcript', {})), + data_visualizer.create_credits_distribution_visualization(profile.get('transcript', {})) ), inputs=None, outputs=[ @@ -1708,33 +2808,165 @@ def create_interface(): movie, movie_reason, show, show_reason, book, book_reason, character, character_reason, blog, transcript_data, output_summary, - req_viz_matplotlib, credits_viz + req_viz_matplotlib, credits_viz_matplotlib ] ) # ===== TAB 5: AI ASSISTANT ===== with gr.Tab("AI Assistant", id=4): - gr.Markdown("## Your Personalized Learning Assistant") + gr.Markdown("## šŸ’¬ Your Personalized Learning Assistant") gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.") - async def chat_wrapper(message: str, history: List[List[str]]): - response = await teaching_assistant.generate_response( - message, - history, - session_token.value - ) - return response - chatbot = gr.ChatInterface( - fn=chat_wrapper, + fn=lambda msg, hist: teaching_assistant.generate_response(msg, hist, session_token.value), examples=[ "What's my GPA?", "How should I study for math?", "What courses am I taking?", - "Study tips for my learning style" + "Study tips for my learning style", + "What colleges should I consider?" ], - title="" + title="", + retry_btn=None, + undo_btn=None + ) + + # ===== TAB 6: GOALS & PLANNING ===== + with gr.Tab("Goals & Planning", id=5): + with gr.Row(): + with gr.Column(scale=1): + gr.Markdown("### šŸŽÆ Step 5: Set Academic Goals") + with gr.Group(): + goal_type = gr.Dropdown( + label="Goal Type", + choices=["GPA Improvement", "Course Completion", "Test Score", "Other"], + value="GPA Improvement" + ) + goal_description = gr.Textbox(label="Goal Description") + goal_target_date = gr.DatePicker(label="Target Date") + goal_target_value = gr.Number(label="Target Value (if applicable)", visible=False) + add_goal_btn = gr.Button("Add Goal", variant="primary") + + gr.Markdown("### šŸ“… Study Calendar") + calendar_start_date = gr.DatePicker(label="Calendar Start Date", value=datetime.date.today()) + generate_calendar_btn = gr.Button("Generate Study Calendar") + + with gr.Column(scale=2): + gr.Markdown("### Your Goals") + goals_output = gr.HTML() + goal_viz = gr.Plot(label="Goal Progress", visible=False) + + gr.Markdown("### Your Study Calendar") + calendar_output = gr.HTML() + calendar_viz = gr.Plot(label="Calendar Visualization", visible=False) + + # Show/hide target value based on goal type + goal_type.change( + fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]), + inputs=goal_type, + outputs=goal_target_value + ) + + # Add goal functionality + add_goal_btn.click( + fn=lambda gt, desc, date, val: ( + goal_tracker.add_goal(name.value, gt, desc, date.isoformat(), val), + update_goals_display(name.value) + ), + inputs=[goal_type, goal_description, goal_target_date, goal_target_value], + outputs=[goals_output, goal_viz] + ) + + # Generate calendar functionality + generate_calendar_btn.click( + fn=lambda date: ( + update_calendar_display(name.value, date) + ), + inputs=calendar_start_date, + outputs=[calendar_output, calendar_viz] ) + + def update_goals_display(profile_name): + goals = goal_tracker.get_goals(profile_name) + if not goals: + return ( + "
No goals set yet. Add your first goal above!
", + gr.update(visible=False) + ) + + goals_html = [] + for goal in goals: + progress = goal['progress'][-1]['value'] if goal['progress'] else 0 + target = goal['target_value'] if goal['target_value'] is not None else "N/A" + + goals_html.append(f""" +
+

{goal['description']}

+

Type: {goal['type']}

+

Target Date: {goal['target_date']}

+

Progress: {progress} / {target}

+ {f"

Last Note: {goal['progress'][-1]['notes']}

" if goal['progress'] else ""} +
+ """) + + return ( + "\n".join(goals_html), + gr.update(visible=goal_tracker.create_goal_visualization(goals) is not None) + ) + + def update_calendar_display(profile_name, start_date): + profile = profile_manager.load_profile(profile_name, session_token.value) + if not profile: + return ( + "
Please complete and save your profile first
", + gr.update(visible=False) + ) + + calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat()) + + # Create HTML display + calendar_html = [] + current_date = datetime.date.fromisoformat(calendar['start_date']) + end_date = datetime.date.fromisoformat(calendar['end_date']) + + while current_date <= end_date: + day_events = [ + e for e in calendar['events'] + if datetime.date.fromisoformat(e['date']) == current_date + ] + + day_exams = [ + e for e in calendar['exams'] + if datetime.date.fromisoformat(e['date']) == current_date + ] + + if day_events or day_exams: + calendar_html.append(f"

{current_date.strftime('%A, %B %d')}

") + + for event in day_events: + calendar_html.append(f""" +
+

šŸ“š {event['title']}

+

ā±ļø {event['duration']}

+

{event['description']}

+
+ """) + + for exam in day_exams: + calendar_html.append(f""" +
+

šŸ“ {exam['title']}

+

ā° All day

+

Prepare by reviewing materials and practicing problems

+
+ """) + + current_date += datetime.timedelta(days=1) + + return ( + "\n".join(calendar_html) if calendar_html else "
No study sessions scheduled yet
", + gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None) + ) # Navigation logic def navigate_to_tab(tab_index: int, tab_completed_status): @@ -1750,7 +2982,8 @@ def create_interface(): "Please complete the transcript analysis first", "Please complete the learning style quiz first", "Please fill out your personal information first", - "Please save your profile first" + "Please save your profile first", + "Please complete the previous steps first" ] return ( gr.Tabs(selected=i), @@ -1787,6 +3020,11 @@ def create_interface(): inputs=[gr.State(4), tab_completed], outputs=[tabs, nav_message] ) + step6.click( + lambda idx, status: navigate_to_tab(idx, status), + inputs=[gr.State(5), tab_completed], + outputs=[tabs, nav_message] + ) # Dark mode toggle def toggle_dark_mode(dark): @@ -1803,8 +3041,8 @@ def create_interface(): return app -app = create_interface() +app = create_enhanced_interface() if __name__ == "__main__": - app.launch() + app.launch(server_name="0.0.0.0", server_port=7860) \ No newline at end of file