Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import json | |
import os | |
import re | |
from PyPDF2 import PdfReader | |
from collections import defaultdict | |
from typing import Dict, List, Optional, Tuple, Union | |
import html | |
from pathlib import Path | |
import fitz # PyMuPDF | |
import pytesseract | |
from PIL import Image | |
import io | |
import secrets | |
import string | |
from huggingface_hub import HfApi, HfFolder | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import time | |
import logging | |
import asyncio | |
from functools import lru_cache | |
import hashlib | |
from concurrent.futures import ThreadPoolExecutor | |
from pydantic import BaseModel | |
import plotly.express as px | |
import pdfplumber | |
from io import BytesIO | |
import base64 | |
import datetime | |
from cryptography.fernet import Fernet | |
import calendar | |
from dateutil.relativedelta import relativedelta | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
import random | |
# Enhanced Configuration | |
PROFILES_DIR = "student_profiles" | |
ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"] | |
MAX_FILE_SIZE_MB = 10 | |
MIN_AGE = 5 | |
MAX_AGE = 120 | |
SESSION_TOKEN_LENGTH = 32 | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode()) | |
SESSION_TIMEOUT = 3600 * 3 | |
MAX_CONTEXT_HISTORY = 10 | |
MAX_PROFILE_LOAD_ATTEMPTS = 3 | |
# Initialize logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler('transcript_parser.log'), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger(__name__) | |
# Model configuration | |
MODEL_NAME = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" | |
def get_model_and_tokenizer(): | |
"""Load and cache the model and tokenizer""" | |
logger.info("Loading model and tokenizer...") | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float16 | |
) | |
logger.info("Model and tokenizer loaded successfully") | |
return model, tokenizer | |
except Exception as e: | |
logger.error(f"Failed to load model: {str(e)}") | |
raise | |
# Initialize Hugging Face API | |
if HF_TOKEN: | |
hf_api = None | |
for attempt in range(3): | |
try: | |
hf_api = HfApi(token=HF_TOKEN) | |
HfFolder.save_token(HF_TOKEN) | |
logger.info("Hugging Face API initialized successfully") | |
break | |
except Exception as e: | |
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}") | |
time.sleep(2 ** attempt) | |
class DataEncryptor: | |
def __init__(self, key: str): | |
self.cipher = Fernet(key.encode()) | |
def encrypt(self, data: str) -> str: | |
return self.cipher.encrypt(data.encode()).decode() | |
def decrypt(self, encrypted_data: str) -> str: | |
return self.cipher.decrypt(encrypted_data.encode()).decode() | |
encryptor = DataEncryptor(ENCRYPTION_KEY) | |
def generate_session_token() -> str: | |
alphabet = string.ascii_letters + string.digits | |
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH)) | |
def sanitize_input(text: str) -> str: | |
if not text: | |
return "" | |
text = html.escape(text.strip()) | |
text = re.sub(r'<[^>]*>', '', text) | |
text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text) | |
return text | |
def validate_name(name: str) -> str: | |
name = name.strip() | |
if not name: | |
raise ValueError("Name cannot be empty.") | |
if len(name) > 100: | |
raise ValueError("Name is too long (maximum 100 characters).") | |
if any(c.isdigit() for c in name): | |
raise ValueError("Name cannot contain numbers.") | |
return name | |
def validate_age(age: Union[int, float, str]) -> int: | |
try: | |
age_int = int(age) | |
if not MIN_AGE <= age_int <= MAX_AGE: | |
raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.") | |
return age_int | |
except (ValueError, TypeError): | |
raise ValueError("Please enter a valid age number.") | |
def validate_file(file_obj) -> None: | |
if not file_obj: | |
raise ValueError("Please upload a file first") | |
file_ext = os.path.splitext(file_obj.name)[1].lower() | |
if file_ext not in ALLOWED_FILE_TYPES: | |
raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}") | |
file_size = os.path.getsize(file_obj.name) / (1024 * 1024) | |
if file_size > MAX_FILE_SIZE_MB: | |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.") | |
def validate_date(date_str: str) -> bool: | |
try: | |
datetime.datetime.strptime(date_str, '%Y-%m-%d') | |
return True | |
except ValueError: | |
return False | |
def remove_sensitive_info(text: str) -> str: | |
patterns = [ | |
(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'), | |
(r'\b\d{6,9}\b', '[ID]'), | |
(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'), | |
(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'), | |
(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'), | |
(r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'), | |
(r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]') | |
] | |
for pattern, replacement in patterns: | |
text = re.sub(pattern, replacement, text) | |
return text | |
class LearningStyleQuiz: | |
def __init__(self): | |
self.questions = [ | |
"When learning something new, I prefer to:", | |
"I remember information best when I:", | |
"When giving directions, I:", | |
"When I'm bored, I tend to:", | |
"When learning a new skill, I prefer to:", | |
"When studying, I like to:", | |
"I prefer teachers who:", | |
"When solving problems, I:", | |
"When working on a group project, I:", | |
"My ideal study environment is:", | |
"When preparing for a test, I:", | |
"When reading instructions, I:", | |
"When explaining something to someone, I:", | |
"When taking notes in class, I:", | |
"When using a new device or app, I:", | |
"When remembering names, I:", | |
"When choosing a book to read, I:", | |
"When giving a presentation, I:", | |
"When organizing my work, I:", | |
"When relaxing, I enjoy:" | |
] | |
self.options = [ | |
["See diagrams and charts", "Listen to explanations", "Read about it", "Try it out hands-on"], | |
["See pictures or diagrams", "Hear someone explain it", "Read about it", "Do something physical with it"], | |
["Draw a map", "Give verbal instructions", "Write down directions", "Demonstrate or guide physically"], | |
["Doodle or look around", "Talk to myself or others", "Read or imagine things", "Fidget or move around"], | |
["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"], | |
["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"], | |
["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"], | |
["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"], | |
["Create visual plans", "Discuss ideas verbally", "Write detailed plans", "Take on hands-on tasks"], | |
["Somewhere quiet with good lighting", "Somewhere I can discuss ideas", "A library with lots of resources", "Somewhere I can move around"], | |
["Create visual study aids", "Recite information aloud", "Write summaries", "Create physical models"], | |
["Look at diagrams first", "Have someone explain them", "Read them carefully", "Try to follow them as I go"], | |
["Draw diagrams or pictures", "Explain verbally", "Write detailed explanations", "Show by doing"], | |
["Draw diagrams and symbols", "Record lectures to listen later", "Write detailed notes", "Underline and highlight"], | |
["Look at the screen layout", "Listen to audio instructions", "Read the manual", "Start clicking buttons"], | |
["Remember faces better than names", "Remember names when I hear them", "Remember names when I see them written", "Remember people by activities we did"], | |
["Choose books with pictures/diagrams", "Choose audiobooks", "Choose text-heavy books", "Choose interactive books"], | |
["Use lots of visual aids", "Focus on my verbal delivery", "Provide handouts", "Use props or demonstrations"], | |
["Use color-coding systems", "Talk through my plan", "Make detailed lists", "Physically arrange materials"], | |
["Watching videos or art", "Listening to music/podcasts", "Reading", "Doing physical activities"] | |
] | |
self.learning_styles = { | |
'visual': "**Visual** learners prefer seeing information in charts, diagrams, and pictures.", | |
'auditory': "**Auditory** learners prefer hearing information spoken and learn best through lectures and discussions.", | |
'reading/writing': "**Reading/Writing** learners prefer information displayed as words and learn best through reading and note-taking.", | |
'kinesthetic': "**Kinesthetic** learners prefer physical experience and learn best through hands-on activities and movement." | |
} | |
def evaluate_quiz(self, *answers): | |
"""Evaluate quiz answers and determine learning style""" | |
if not answers or any(a is None for a in answers): | |
raise gr.Error("Please answer all questions before submitting") | |
style_counts = { | |
'visual': 0, | |
'auditory': 0, | |
'reading/writing': 0, | |
'kinesthetic': 0 | |
} | |
for answer in answers: | |
if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower(): | |
style_counts['visual'] += 1 | |
elif answer.startswith("Listen") or answer.startswith("Hear") or answer.startswith("Talk") or "lecture" in answer.lower(): | |
style_counts['auditory'] += 1 | |
elif answer.startswith("Read") or "note" in answer.lower() or "write" in answer.lower(): | |
style_counts['reading/writing'] += 1 | |
elif answer.startswith("Try") or "physical" in answer.lower() or "hands-on" in answer.lower(): | |
style_counts['kinesthetic'] += 1 | |
primary_style = max(style_counts, key=style_counts.get) | |
secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3] | |
result = [ | |
"## π― Your Learning Style Results", | |
f"Your primary learning style is **{primary_style.capitalize()}**", | |
self.learning_styles[primary_style], | |
"", | |
"### Tips for Your Learning Style:" | |
] | |
if primary_style == 'visual': | |
result.extend([ | |
"- Use color coding in your notes", | |
"- Create mind maps and diagrams", | |
"- Watch educational videos to visualize concepts", | |
"- Highlight or underline important information" | |
]) | |
elif primary_style == 'auditory': | |
result.extend([ | |
"- Record lectures and listen to them", | |
"- Explain concepts out loud to yourself", | |
"- Participate in study groups", | |
"- Use rhymes or songs to remember information" | |
]) | |
elif primary_style == 'reading/writing': | |
result.extend([ | |
"- Write detailed summaries in your own words", | |
"- Create question-answer sets for each topic", | |
"- Rewrite your notes to reinforce learning", | |
"- Read textbooks and articles on the subject" | |
]) | |
elif primary_style == 'kinesthetic': | |
result.extend([ | |
"- Use hands-on activities when possible", | |
"- Study while moving or pacing", | |
"- Create physical models to represent concepts", | |
"- Take frequent short breaks to move around" | |
]) | |
result.extend([ | |
"", | |
"### Secondary Learning Styles:", | |
f"1. {secondary_styles[0][0].capitalize()}", | |
f"2. {secondary_styles[1][0].capitalize()}" | |
]) | |
return "\n".join(result) | |
# Initialize learning style quiz | |
learning_style_quiz = LearningStyleQuiz() | |
class MiamiDadeTranscriptParser: | |
def __init__(self): | |
self.patterns = { | |
'student_info': re.compile( | |
r"LEGAL NAME:\s*([^\n]+?)\s*MAILING\s+ADDRESS:.*?" | |
r"GRADE LEVEL:\s*(\d+).*?" | |
r"FL STUDENT ID:\s*(\w+).*?" | |
r"CURRENT SCHOOL:\s*(\d+\s+[^\n]+?)\s*\(", | |
re.DOTALL | |
), | |
'gpa': re.compile( | |
r"DISTRICT:\s*([\d.]+).*?STATE:\s*([\d.]+)", | |
re.DOTALL | |
), | |
'credits': re.compile( | |
r"\*\s+([A-Z\s/]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*", | |
re.DOTALL | |
), | |
'course': re.compile( | |
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)", | |
re.DOTALL | |
), | |
'assessment': re.compile( | |
r"ENGLISH/LANGUAGE ARTS:\s*(\d{2}/\d{4})|" | |
r"ALGEBRA I ASSESSMENT REQUIREMENT MET:\s*(YES|NO)|" | |
r"BIOLOGY ASSESSMENT PASSED|" | |
r"DISTRICT COMM/VOL SERVICE RQMT MET:\s*(YES).*?HRS:\s*(\d+)", | |
re.DOTALL | |
), | |
'class_rank': re.compile( | |
r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)", | |
re.DOTALL | |
), | |
'course_alt': re.compile( | |
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)", | |
re.DOTALL | |
) | |
} | |
def parse_transcript(self, file_path: str) -> Dict: | |
"""Parse Miami-Dade transcript PDF with multiple extraction methods""" | |
try: | |
# First try pdfplumber with progress bar | |
text = "" | |
with pdfplumber.open(file_path) as pdf: | |
with tqdm(total=len(pdf.pages), desc="Processing transcript") as pbar: | |
for page in pdf.pages: | |
text += page.extract_text() + "\n" | |
pbar.update(1) | |
# Fallback to PyMuPDF if text extraction is poor | |
if len(text) < 500: | |
logger.warning("Low text extraction with pdfplumber, trying PyMuPDF") | |
doc = fitz.open(file_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return self._parse_miami_dade_format(text) | |
except pdfplumber.PDFSyntaxError as e: | |
error_msg = "Invalid PDF file. Please ensure you're uploading a valid transcript PDF." | |
logger.error(f"{error_msg}: {str(e)}") | |
raise ValueError(f"{error_msg} If the problem persists, try converting the file to a different format.") | |
except Exception as e: | |
logger.error(f"Error parsing transcript: {str(e)}") | |
raise ValueError(f"Error processing transcript: {str(e)}") | |
def _parse_miami_dade_format(self, text: str) -> Dict: | |
"""Parse the specific Miami-Dade transcript format""" | |
parsed_data = { | |
'student_info': self._parse_student_info(text), | |
'academic_summary': self._parse_academic_summary(text), | |
'course_history': self._parse_courses(text), | |
'assessments': self._parse_assessments(text), | |
'format': 'miami_dade_v3' | |
} | |
# Validate we got at least some data | |
if not parsed_data['student_info'] or not parsed_data['course_history']: | |
raise ValueError("Incomplete data extracted from transcript") | |
return parsed_data | |
def _parse_student_info(self, text: str) -> Dict: | |
"""Extract student information with improved pattern matching""" | |
match = self.patterns['student_info'].search(text) | |
if not match: | |
return {} | |
return { | |
'name': match.group(1).strip(), | |
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown", | |
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown", | |
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown", | |
'birth_date': self._extract_birth_date(text), | |
'ethnicity': self._extract_ethnicity(text) | |
} | |
def _extract_birth_date(self, text: str) -> Optional[str]: | |
"""Extract birth date from transcript""" | |
birth_match = re.search(r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})", text) | |
if birth_match: | |
return birth_match.group(1) | |
return None | |
def _extract_ethnicity(self, text: str) -> Optional[str]: | |
"""Extract ethnicity information""" | |
eth_match = re.search(r"ETHNICITY:\s*([^\n]+)", text) | |
if eth_match: | |
return eth_match.group(1).strip() | |
return None | |
def _parse_academic_summary(self, text: str) -> Dict: | |
"""Parse academic summary section""" | |
summary = { | |
'gpa': {'district': None, 'state': None}, | |
'credits': {}, | |
'class_rank': {'percentile': None, 'class_size': None} | |
} | |
# GPA | |
gpa_match = self.patterns['gpa'].search(text) | |
if gpa_match: | |
summary['gpa']['district'] = float(gpa_match.group(1)) | |
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district'] | |
# Credits | |
credits_matches = self.patterns['credits'].finditer(text) | |
for match in credits_matches: | |
subject = match.group(1).strip() | |
summary['credits'][subject] = { | |
'earned': float(match.group(2)), | |
'required': float(match.group(3)) if match.group(3) else None, | |
'remaining': float(match.group(4)) if match.group(4) else None | |
} | |
# Class Rank | |
rank_match = self.patterns['class_rank'].search(text) | |
if rank_match: | |
summary['class_rank']['percentile'] = int(rank_match.group(1)) | |
summary['class_rank']['class_size'] = int(rank_match.group(2)) | |
return summary | |
def _parse_courses(self, text: str) -> List[Dict]: | |
"""Parse course history section""" | |
courses = [] | |
# Try primary pattern first | |
for match in self.patterns['course'].finditer(text): | |
courses.append({ | |
'term': match.group(1), | |
'course_code': match.group(2), | |
'course_title': match.group(3).strip(), | |
'subject_area': match.group(4), | |
'grade': match.group(5), | |
'flag': match.group(6), | |
'credit_status': match.group(7), | |
'credit_attempted': float(match.group(8)), | |
'credit_earned': float(match.group(9)) | |
}) | |
# If no courses found, try alternative pattern | |
if not courses: | |
for match in self.patterns['course_alt'].finditer(text): | |
courses.append({ | |
'term': match.group(1), | |
'course_code': match.group(2), | |
'course_title': match.group(3).strip(), | |
'subject_area': match.group(4), | |
'grade': match.group(5), | |
'credit_attempted': float(match.group(6)), | |
'credit_earned': float(match.group(7)) | |
}) | |
return courses | |
def _parse_assessments(self, text: str) -> Dict: | |
"""Parse assessment and requirement information""" | |
assessments = { | |
'ela_passed_date': None, | |
'algebra_passed': False, | |
'biology_passed': False, | |
'community_service': { | |
'met': False, | |
'hours': 0 | |
} | |
} | |
matches = self.patterns['assessment'].finditer(text) | |
for match in matches: | |
if match.group(1): # ELA date | |
assessments['ela_passed_date'] = match.group(1) | |
elif match.group(2): # Algebra | |
assessments['algebra_passed'] = match.group(2) == "YES" | |
elif "BIOLOGY" in match.group(0): | |
assessments['biology_passed'] = True | |
elif "SERVICE" in match.group(0): | |
assessments['community_service'] = { | |
'met': True, | |
'hours': int(match.group(4)) if match.group(4) else 0 | |
} | |
return assessments | |
# Initialize the parser | |
transcript_parser = MiamiDadeTranscriptParser() | |
class AcademicAnalyzer: | |
def __init__(self): | |
self.gpa_scale = { | |
'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, | |
'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0 | |
} | |
self.college_tiers = { | |
'ivy_league': {'gpa': 4.3, 'rigor': 8, 'service': 100}, | |
'top_tier': {'gpa': 4.0, 'rigor': 6, 'service': 80}, | |
'competitive': {'gpa': 3.7, 'rigor': 4, 'service': 60}, | |
'good': {'gpa': 3.3, 'rigor': 2, 'service': 40}, | |
'average': {'gpa': 2.7, 'rigor': 1, 'service': 20} | |
} | |
def analyze_gpa(self, parsed_data: Dict) -> Dict: | |
analysis = { | |
'rating': '', | |
'description': '', | |
'comparison': '', | |
'improvement_tips': [] | |
} | |
try: | |
# Handle multiple transcript formats | |
if parsed_data.get('format') == 'progress_summary': | |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) | |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0)) | |
elif parsed_data.get('format') == 'miami_dade_v3': | |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0)) | |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0)) | |
else: # Alternative format | |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', 0)) | |
unweighted_gpa = weighted_gpa # Assume same if not specified | |
if weighted_gpa >= 4.5: | |
analysis['rating'] = 'Excellent' | |
analysis['description'] = "π You're in the top tier of students with a highly competitive GPA." | |
analysis['comparison'] = "This puts you in the top 5% of students nationally." | |
analysis['improvement_tips'] = [ | |
"Consider taking advanced courses to challenge yourself", | |
"Look into college-level courses or research opportunities" | |
] | |
elif weighted_gpa >= 4.0: | |
analysis['rating'] = 'Strong' | |
analysis['description'] = "π Your GPA is strong and competitive for most colleges." | |
analysis['comparison'] = "This is above the national average and competitive for many universities." | |
analysis['improvement_tips'] = [ | |
"Maintain your current study habits", | |
"Consider adding 1-2 more challenging courses" | |
] | |
elif weighted_gpa >= 3.5: | |
analysis['rating'] = 'Good' | |
analysis['description'] = "βΉοΈ Your GPA is good but could be improved for more competitive schools." | |
analysis['comparison'] = "This is slightly above the national average." | |
analysis['improvement_tips'] = [ | |
"Focus on improving in your weaker subjects", | |
"Consider getting tutoring for challenging courses", | |
"Develop better study habits and time management" | |
] | |
elif weighted_gpa >= 3.0: | |
analysis['rating'] = 'Average' | |
analysis['description'] = "β οΈ Your GPA is average. Focus on improvement for better college options." | |
analysis['comparison'] = "This is around the national average." | |
analysis['improvement_tips'] = [ | |
"Identify your weakest subjects and focus on them", | |
"Develop a consistent study schedule", | |
"Seek help from teachers or tutors", | |
"Consider retaking courses with low grades if possible" | |
] | |
else: | |
analysis['rating'] = 'Below Average' | |
analysis['description'] = "β Your GPA is below average. Please consult with your academic advisor." | |
analysis['comparison'] = "This is below the national average and may limit college options." | |
analysis['improvement_tips'] = [ | |
"Meet with your school counselor immediately", | |
"Develop a structured improvement plan", | |
"Consider summer school or credit recovery options", | |
"Focus on fundamental study skills" | |
] | |
if weighted_gpa > 0 and unweighted_gpa > 0: | |
diff = weighted_gpa - unweighted_gpa | |
if diff > 0.5: | |
analysis['comparison'] += "\n\nThe significant difference between your weighted and unweighted GPA suggests you're taking many advanced courses." | |
elif diff > 0.2: | |
analysis['comparison'] += "\n\nThe moderate difference between your weighted and unweighted GPA suggests a good balance of standard and advanced courses." | |
else: | |
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses." | |
return analysis | |
except Exception as e: | |
logger.error(f"GPA analysis error: {str(e)}") | |
return { | |
'rating': 'Unknown', | |
'description': 'Could not analyze GPA - data may be missing or incomplete', | |
'comparison': 'Please verify your transcript contains GPA information', | |
'improvement_tips': [ | |
"Check that your transcript includes GPA information", | |
"Ensure the file is clear and all text was extracted properly" | |
] | |
} | |
def analyze_graduation_status(self, parsed_data: Dict) -> Dict: | |
analysis = { | |
'status': '', | |
'completion_percentage': 0, | |
'missing_requirements': [], | |
'on_track': False, | |
'timeline': '' | |
} | |
try: | |
if parsed_data.get('format') == 'progress_summary': | |
total_match = re.search(r'Total\s*\|\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)%', text) | |
if total_match: | |
analysis['completion_percentage'] = float(total_match.group(4)) | |
else: | |
total_required = sum( | |
float(req.get('required', 0)) | |
for req in parsed_data.get('requirements', {}).values() | |
if req and str(req.get('required', '0')).replace('.','').isdigit() | |
) | |
total_completed = sum( | |
float(req.get('completed', 0)) | |
for req in parsed_data.get('requirements', {}).values() | |
if req and str(req.get('completed', '0')).replace('.','').isdigit() | |
) | |
analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0 | |
analysis['missing_requirements'] = [ | |
{ | |
'code': code, | |
'description': req.get('description', ''), | |
'remaining': max(0, float(req.get('required', 0)) - float(req.get('completed', 0))), | |
'status': req.get('status', '') | |
} | |
for code, req in parsed_data.get('requirements', {}).items() | |
if req and float(req.get('completed', 0)) < float(req.get('required', 0)) | |
] | |
else: | |
credits = parsed_data.get('academic_summary', {}).get('credits', {}) | |
total_required = sum( | |
v.get('required', 0) | |
for v in credits.values() | |
if v and isinstance(v.get('required'), (int, float)) | |
) | |
total_earned = sum( | |
v.get('earned', 0) | |
for v in credits.values() | |
if v and isinstance(v.get('earned'), (int, float)) | |
) | |
analysis['completion_percentage'] = (total_earned / total_required) * 100 if total_required > 0 else 0 | |
analysis['missing_requirements'] = [ | |
{ | |
'subject': subject, | |
'earned': info.get('earned', 0), | |
'required': info.get('required', 0), | |
'remaining': max(0, info.get('required', 0) - info.get('earned', 0)) | |
} | |
for subject, info in credits.items() | |
if info and info.get('required', 0) > info.get('earned', 0) | |
] | |
current_grade = parsed_data.get('student_info', {}).get('grade', '') | |
grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '') | |
if analysis['completion_percentage'] >= 100: | |
analysis['status'] = "π Congratulations! You've met all graduation requirements." | |
analysis['on_track'] = True | |
elif analysis['completion_percentage'] >= 90: | |
analysis['status'] = f"β You've completed {analysis['completion_percentage']:.1f}% of requirements. Almost there!" | |
analysis['on_track'] = True | |
elif analysis['completion_percentage'] >= 75: | |
analysis['status'] = f"π You've completed {analysis['completion_percentage']:.1f}% of requirements. Keep working!" | |
analysis['on_track'] = True | |
elif analysis['completion_percentage'] >= 50: | |
analysis['status'] = f"β οΈ You've completed {analysis['completion_percentage']:.1f}% of requirements. Please meet with your counselor." | |
analysis['on_track'] = False | |
else: | |
analysis['status'] = f"β You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed." | |
analysis['on_track'] = False | |
if current_grade and grad_year: | |
remaining_credits = total_required - total_earned | |
years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade) | |
if years_remaining > 0: | |
credits_per_year = remaining_credits / years_remaining | |
analysis['timeline'] = ( | |
f"To graduate on time in {grad_year}, you need to complete approximately " | |
f"{credits_per_year:.1f} credits per year." | |
) | |
return analysis | |
except Exception as e: | |
logger.error(f"Graduation status error: {str(e)}") | |
return { | |
'status': 'Could not analyze graduation status - data may be incomplete', | |
'completion_percentage': 0, | |
'missing_requirements': [], | |
'on_track': False, | |
'timeline': 'Please verify your transcript contains credit information' | |
} | |
def analyze_course_rigor(self, parsed_data: Dict) -> Dict: | |
analysis = { | |
'advanced_courses': 0, | |
'honors_courses': 0, | |
'ap_courses': 0, | |
'ib_courses': 0, | |
'de_courses': 0, | |
'rating': '', | |
'recommendations': [] | |
} | |
try: | |
courses = parsed_data.get('course_history', []) | |
for course in courses: | |
course_title = course.get('description', '') or course.get('course_title', '') | |
course_title = course_title.upper() | |
if 'AP' in course_title or 'ADVANCED PLACEMENT' in course_title: | |
analysis['ap_courses'] += 1 | |
analysis['advanced_courses'] += 1 | |
elif 'IB' in course_title or 'INTERNATIONAL BACCALAUREATE' in course_title: | |
analysis['ib_courses'] += 1 | |
analysis['advanced_courses'] += 1 | |
elif 'DE' in course_title or 'DUAL ENROLLMENT' in course_title or 'COLLEGE' in course_title: | |
analysis['de_courses'] += 1 | |
analysis['advanced_courses'] += 1 | |
elif 'HONORS' in course_title or course.get('flag', '') == 'H': | |
analysis['honors_courses'] += 1 | |
analysis['advanced_courses'] += 1 | |
total_advanced = analysis['advanced_courses'] | |
total_courses = len(courses) | |
if total_courses == 0: | |
return analysis | |
advanced_percentage = (total_advanced / total_courses) * 100 | |
if advanced_percentage >= 50: | |
analysis['rating'] = 'Very High Rigor' | |
analysis['recommendations'] = [ | |
"Your course rigor is excellent for college admissions", | |
"Consider adding 1-2 more advanced courses if manageable" | |
] | |
elif advanced_percentage >= 30: | |
analysis['rating'] = 'High Rigor' | |
analysis['recommendations'] = [ | |
"Your course rigor is strong", | |
"Consider adding 1-2 more advanced courses next year" | |
] | |
elif advanced_percentage >= 15: | |
analysis['rating'] = 'Moderate Rigor' | |
analysis['recommendations'] = [ | |
"Your course rigor is average", | |
"Consider adding more advanced courses to strengthen your profile" | |
] | |
else: | |
analysis['rating'] = 'Low Rigor' | |
analysis['recommendations'] = [ | |
"Your course rigor is below average for college-bound students", | |
"Strongly consider adding advanced courses next semester", | |
"Meet with your counselor to discuss options" | |
] | |
return analysis | |
except Exception as e: | |
logger.error(f"Course rigor error: {str(e)}") | |
return { | |
'advanced_courses': 0, | |
'honors_courses': 0, | |
'ap_courses': 0, | |
'ib_courses': 0, | |
'de_courses': 0, | |
'rating': 'Unknown', | |
'recommendations': [ | |
"Could not analyze course rigor - verify your transcript contains course information", | |
"Check that course titles and types were properly extracted" | |
] | |
} | |
def generate_college_recommendations(self, parsed_data: Dict) -> Dict: | |
recommendations = { | |
'reach': [], | |
'target': [], | |
'safety': [], | |
'scholarships': [], | |
'improvement_areas': [] | |
} | |
try: | |
if parsed_data.get('format') == 'progress_summary': | |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) | |
service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0)) | |
else: | |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0)) | |
service_hours = int(parsed_data.get('assessments', {}).get('community_service', {}).get('hours', 0)) | |
rigor_analysis = self.analyze_course_rigor(parsed_data) | |
if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100: | |
recommendations['reach'].extend([ | |
"Ivy League: Harvard, Yale, Princeton, Columbia, etc.", | |
"Stanford, MIT, CalTech, University of Chicago" | |
]) | |
recommendations['target'].extend([ | |
"Top Public Universities: UCLA, UC Berkeley, UMich, UVA", | |
"Elite Liberal Arts: Williams, Amherst, Swarthmore" | |
]) | |
elif weighted_gpa >= 4.0 and rigor_analysis['advanced_courses'] >= 6 and service_hours >= 80: | |
recommendations['reach'].extend([ | |
"Top 20 National Universities", | |
"Highly Selective Liberal Arts Colleges" | |
]) | |
recommendations['target'].extend([ | |
"Top 50 National Universities", | |
"Selective Public Flagships", | |
"Top Liberal Arts Colleges" | |
]) | |
elif weighted_gpa >= 3.7 and rigor_analysis['advanced_courses'] >= 4 and service_hours >= 60: | |
recommendations['reach'].extend([ | |
"Top 50 National Universities", | |
"Selective Liberal Arts Colleges" | |
]) | |
recommendations['target'].extend([ | |
"State Flagship Universities", | |
"Good Regional Universities" | |
]) | |
elif weighted_gpa >= 3.3 and rigor_analysis['advanced_courses'] >= 2 and service_hours >= 40: | |
recommendations['target'].extend([ | |
"State Universities", | |
"Many Private Colleges" | |
]) | |
recommendations['safety'].extend([ | |
"Less Selective Private Colleges", | |
"Community Colleges with Transfer Programs" | |
]) | |
else: | |
recommendations['target'].extend([ | |
"Open Admission Colleges", | |
"Some State Universities" | |
]) | |
recommendations['safety'].extend([ | |
"Community Colleges", | |
"Technical Schools" | |
]) | |
if weighted_gpa >= 4.0: | |
recommendations['scholarships'].extend([ | |
"National Merit Scholarship", | |
"Presidential Scholarships", | |
"College-Specific Full-Ride Scholarships" | |
]) | |
elif weighted_gpa >= 3.7: | |
recommendations['scholarships'].extend([ | |
"Bright Futures (Florida)", | |
"State-Specific Merit Scholarships", | |
"Honors College Scholarships" | |
]) | |
elif weighted_gpa >= 3.3: | |
recommendations['scholarships'].extend([ | |
"Local Community Scholarships", | |
"Special Interest Scholarships", | |
"First-Generation Student Programs" | |
]) | |
if weighted_gpa < 3.5: | |
recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring") | |
if rigor_analysis['advanced_courses'] < 4: | |
recommendations['improvement_areas'].append("Take more advanced courses (AP/IB/DE/Honors)") | |
if service_hours < 50: | |
recommendations['improvement_areas'].append("Increase community service involvement") | |
return recommendations | |
except Exception as e: | |
logger.error(f"College recommendations error: {str(e)}") | |
return { | |
'reach': ["Could not generate recommendations - insufficient data"], | |
'target': [], | |
'safety': [], | |
'scholarships': [], | |
'improvement_areas': [ | |
"Complete your profile information", | |
"Ensure your transcript contains GPA and course information" | |
] | |
} | |
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict: | |
plan = { | |
'weekly_schedule': {}, | |
'study_strategies': [], | |
'time_management_tips': [], | |
'resource_recommendations': [] | |
} | |
try: | |
current_courses = [ | |
course for course in parsed_data.get('course_history', []) | |
if course.get('status', '').lower() == 'in progress' or | |
(isinstance(course.get('credit_earned'), float) and course['credit_earned'] == 0) | |
] | |
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] | |
for day in days: | |
plan['weekly_schedule'][day] = [] | |
study_blocks = 2 | |
if learning_style.lower() == 'visual': | |
study_blocks = 3 | |
plan['study_strategies'].extend([ | |
"Create colorful mind maps for each subject", | |
"Use flashcards with images and diagrams", | |
"Watch educational videos on topics" | |
]) | |
elif learning_style.lower() == 'auditory': | |
study_blocks = 2 | |
plan['study_strategies'].extend([ | |
"Record yourself explaining concepts and listen back", | |
"Participate in study groups", | |
"Listen to educational podcasts" | |
]) | |
elif learning_style.lower() == 'reading/writing': | |
study_blocks = 4 | |
plan['study_strategies'].extend([ | |
"Write detailed summaries in your own words", | |
"Create question-answer sets for each topic", | |
"Rewrite your notes to reinforce learning" | |
]) | |
elif learning_style.lower() == 'kinesthetic': | |
study_blocks = 3 | |
plan['study_strategies'].extend([ | |
"Create physical models or demonstrations", | |
"Study while walking or pacing", | |
"Use hands-on activities when possible" | |
]) | |
for i, course in enumerate(current_courses): | |
day_index = i % 5 | |
day = days[day_index] | |
course_name = course.get('description') or course.get('course_title', 'Course') | |
plan['weekly_schedule'][day].append({ | |
'course': course_name, | |
'duration': '45-60 minutes', | |
'activities': [ | |
"Review notes", | |
"Complete practice problems", | |
"Prepare questions for teacher" | |
] | |
}) | |
plan['time_management_tips'].extend([ | |
"Use the Pomodoro technique (25 min study, 5 min break)", | |
"Prioritize assignments by due date and importance", | |
"Schedule regular review sessions" | |
]) | |
plan['resource_recommendations'].extend([ | |
"Khan Academy for math and science", | |
"Quizlet for flashcards", | |
"Wolfram Alpha for math help" | |
]) | |
return plan | |
except Exception as e: | |
logger.error(f"Study plan error: {str(e)}") | |
return { | |
'weekly_schedule': {'Error': ["Could not generate schedule - course data may be missing"]}, | |
'study_strategies': [ | |
"Review your notes regularly", | |
"Create a consistent study routine", | |
"Ask teachers for clarification when needed" | |
], | |
'time_management_tips': [ | |
"Set aside dedicated study time each day", | |
"Break large tasks into smaller chunks", | |
"Use a planner to track assignments" | |
], | |
'resource_recommendations': [ | |
"Khan Academy", | |
"Quizlet", | |
"Your textbook and class materials" | |
] | |
} | |
# Initialize academic analyzer | |
academic_analyzer = AcademicAnalyzer() | |
class DataVisualizer: | |
def __init__(self): | |
self.color_palette = { | |
'complete': '#4CAF50', | |
'incomplete': '#F44336', | |
'in_progress': '#FFC107', | |
'gpa_weighted': '#3F51B5', | |
'gpa_unweighted': '#9C27B0', | |
'core': '#3498DB', | |
'electives': '#2ECC71', | |
'arts_pe': '#9B59B6' | |
} | |
def create_gpa_visualization(self, parsed_data: Dict): | |
try: | |
if parsed_data.get('format') == 'progress_summary': | |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0)) | |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0)) | |
else: | |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0)) | |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0)) | |
gpa_data = { | |
"Type": ["Weighted GPA", "Unweighted GPA"], | |
"Value": [weighted_gpa, unweighted_gpa], | |
"Color": [self.color_palette['gpa_weighted'], self.color_palette['gpa_unweighted']] | |
} | |
df = pd.DataFrame(gpa_data) | |
fig = px.bar( | |
df, | |
x="Type", | |
y="Value", | |
title="GPA Comparison", | |
color="Type", | |
color_discrete_map={ | |
"Weighted GPA": self.color_palette['gpa_weighted'], | |
"Unweighted GPA": self.color_palette['gpa_unweighted'] | |
}, | |
text="Value", | |
hover_data={"Type": True, "Value": ":.2f"} | |
) | |
fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left") | |
fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left") | |
fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left") | |
fig.update_traces( | |
texttemplate='%{text:.2f}', | |
textposition='outside', | |
marker_line_color='rgb(8,48,107)', | |
marker_line_width=1.5 | |
) | |
fig.update_layout( | |
yaxis_range=[0, 5], | |
uniformtext_minsize=8, | |
uniformtext_mode='hide', | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12) | |
) | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating GPA visualization: {str(e)}") | |
return None | |
def create_requirements_visualization(self, parsed_data: Dict): | |
try: | |
if parsed_data.get('format') == 'progress_summary': | |
req_data = [] | |
for code, req in parsed_data.get('requirements', {}).items(): | |
if req and req.get('percent_complete'): | |
completion = float(req['percent_complete']) | |
req_data.append({ | |
"Requirement": f"{code}: {req.get('description', '')[:30]}...", | |
"Completion (%)": completion, | |
"Status": "Complete" if completion >= 100 else "In Progress" if completion > 0 else "Not Started", | |
"Required": req.get('required', 0), | |
"Completed": req.get('completed', 0), | |
"Remaining": max(0, float(req.get('required', 0)) - float(req.get('completed', 0))) | |
}) | |
else: | |
req_data = [] | |
credits = parsed_data.get('academic_summary', {}).get('credits', {}) | |
for subject, info in credits.items(): | |
if info.get('required') and info.get('earned'): | |
completion = (info['earned'] / info['required']) * 100 if info['required'] > 0 else 0 | |
req_data.append({ | |
"Requirement": subject, | |
"Completion (%)": completion, | |
"Status": "Complete" if completion >= 100 else "In Progress" if completion > 0 else "Not Started", | |
"Required": info.get('required', 0), | |
"Completed": info.get('earned', 0), | |
"Remaining": max(0, info.get('required', 0) - info.get('earned', 0)) | |
}) | |
if not req_data: | |
return None | |
df = pd.DataFrame(req_data) | |
fig = px.bar( | |
df, | |
x="Requirement", | |
y="Completion (%)", | |
title="Graduation Requirements Completion", | |
color="Status", | |
color_discrete_map={ | |
"Complete": self.color_palette['complete'], | |
"In Progress": self.color_palette['in_progress'], | |
"Not Started": self.color_palette['incomplete'] | |
}, | |
hover_data=["Required", "Completed", "Remaining"], | |
text="Completion (%)" | |
) | |
fig.update_traces( | |
texttemplate='%{text:.1f}%', | |
textposition='outside', | |
marker_line_color='rgb(8,48,107)', | |
marker_line_width=1.5 | |
) | |
fig.update_layout( | |
xaxis={'categoryorder':'total descending'}, | |
yaxis_range=[0, 100], | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12), | |
hovermode="x unified" | |
) | |
fig.add_hline(y=100, line_dash="dot", line_color="green") | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating requirements visualization: {str(e)}") | |
return None | |
def create_credits_distribution_visualization(self, parsed_data: Dict): | |
try: | |
if parsed_data.get('format') == 'progress_summary': | |
core_credits = sum( | |
req['completed'] for req in parsed_data.get('requirements', {}).values() | |
if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D'] | |
) | |
elective_credits = sum( | |
req['completed'] for req in parsed_data.get('requirements', {}).values() | |
if req and req.get('code', '').split('-')[0] in ['G', 'H'] | |
) | |
other_credits = sum( | |
req['completed'] for req in parsed_data.get('requirements', {}).values() | |
if req and req.get('code', '').split('-')[0] in ['E', 'F'] | |
) | |
else: | |
credits = parsed_data.get('academic_summary', {}).get('credits', {}) | |
core_credits = sum( | |
info['earned'] for subject, info in credits.items() | |
if subject.split()[0] in ['ENGLISH', 'ALGEBRA1', 'GEOMETRY', 'MATHEMATICS', 'BIOLOGY', 'SCIENCE'] | |
) | |
elective_credits = sum( | |
info['earned'] for subject, info in credits.items() | |
if subject.split()[0] in ['ELECTIVE', 'WORLD'] | |
) | |
other_credits = sum( | |
info['earned'] for subject, info in credits.items() | |
if subject.split()[0] in ['ARTS', 'PHYSICAL', 'PERFORMING'] | |
) | |
credit_values = [core_credits, elective_credits, other_credits] | |
credit_labels = ['Core Subjects', 'Electives', 'Arts/PE'] | |
if sum(credit_values) == 0: | |
return None | |
df = pd.DataFrame({ | |
"Category": credit_labels, | |
"Credits": credit_values, | |
"Color": [self.color_palette['core'], self.color_palette['electives'], self.color_palette['arts_pe']] | |
}) | |
fig = px.pie( | |
df, | |
values="Credits", | |
names="Category", | |
title="Credit Distribution", | |
color="Category", | |
color_discrete_map={ | |
"Core Subjects": self.color_palette['core'], | |
"Electives": self.color_palette['electives'], | |
"Arts/PE": self.color_palette['arts_pe'] | |
}, | |
hole=0.3 | |
) | |
fig.update_traces( | |
textposition='inside', | |
textinfo='percent+label', | |
marker=dict(line=dict(color='#FFFFFF', width=2)) | |
) | |
fig.update_layout( | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12), | |
showlegend=False | |
) | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating credits visualization: {str(e)}") | |
return None | |
def create_course_rigor_visualization(self, parsed_data: Dict): | |
try: | |
rigor = academic_analyzer.analyze_course_rigor(parsed_data) | |
data = { | |
"Type": ["AP", "IB", "DE", "Honors"], | |
"Count": [rigor['ap_courses'], rigor['ib_courses'], rigor['de_courses'], rigor['honors_courses']], | |
"Color": ["#E91E63", "#673AB7", "#009688", "#FF9800"] | |
} | |
df = pd.DataFrame(data) | |
fig = px.bar( | |
df, | |
x="Type", | |
y="Count", | |
title="Advanced Course Breakdown", | |
color="Type", | |
color_discrete_map={ | |
"AP": "#E91E63", | |
"IB": "#673AB7", | |
"DE": "#009688", | |
"Honors": "#FF9800" | |
}, | |
text="Count" | |
) | |
fig.update_traces( | |
textposition='outside', | |
marker_line_color='rgb(8,48,107)', | |
marker_line_width=1.5 | |
) | |
fig.update_layout( | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12), | |
xaxis_title="Course Type", | |
yaxis_title="Number of Courses" | |
) | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating course rigor visualization: {str(e)}") | |
return None | |
# Initialize visualizer | |
data_visualizer = DataVisualizer() | |
class EnhancedProfileManager: | |
def __init__(self): | |
self.profiles_dir = Path(PROFILES_DIR) | |
self.profiles_dir.mkdir(exist_ok=True, parents=True) | |
self.current_session = None | |
self.encryptor = DataEncryptor(ENCRYPTION_KEY) | |
def set_session(self, session_token: str) -> None: | |
self.current_session = session_token | |
def get_profile_path(self, name: str) -> Path: | |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] | |
if self.current_session: | |
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json" | |
return self.profiles_dir / f"{name_hash}_profile.json" | |
def save_profile(self, name: str, age: Union[int, str], interests: str, | |
transcript: Dict, learning_style: str, | |
movie: str, movie_reason: str, show: str, show_reason: str, | |
book: str, book_reason: str, character: str, character_reason: str, | |
blog: str, study_plan: Dict = None) -> str: | |
try: | |
name = validate_name(name) | |
age = validate_age(age) | |
if not interests.strip(): | |
raise ValueError("Please describe at least one interest or hobby.") | |
if not transcript: | |
raise ValueError("Please complete the transcript analysis first.") | |
if not learning_style or "Your primary learning style is" not in learning_style: | |
raise ValueError("Please complete the learning style quiz first.") | |
favorites = { | |
"movie": sanitize_input(movie), | |
"movie_reason": sanitize_input(movie_reason), | |
"show": sanitize_input(show), | |
"show_reason": sanitize_input(show_reason), | |
"book": sanitize_input(book), | |
"book_reason": sanitize_input(book_reason), | |
"character": sanitize_input(character), | |
"character_reason": sanitize_input(character_reason) | |
} | |
if not study_plan: | |
learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style) | |
if learning_style_match: | |
study_plan = academic_analyzer.generate_study_plan( | |
transcript, | |
learning_style_match.group(1)) | |
data = { | |
"name": self.encryptor.encrypt(name), | |
"age": age, | |
"interests": self.encryptor.encrypt(sanitize_input(interests)), | |
"transcript": transcript, | |
"learning_style": learning_style, | |
"favorites": favorites, | |
"blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "", | |
"study_plan": study_plan if study_plan else {}, | |
"session_token": self.current_session, | |
"last_updated": time.time(), | |
"version": "2.1" | |
} | |
filepath = self.get_profile_path(name) | |
temp_path = filepath.with_suffix('.tmp') | |
with open(temp_path, "w", encoding='utf-8') as f: | |
json.dump(data, f, indent=2, ensure_ascii=False) | |
temp_path.replace(filepath) | |
if HF_TOKEN and hf_api: | |
try: | |
hf_api.upload_file( | |
path_or_fileobj=filepath, | |
path_in_repo=f"profiles/{filepath.name}", | |
repo_id="your-username/student-learning-assistant", | |
repo_type="dataset", | |
commit_message=f"Profile update for {name}" | |
) | |
except Exception as e: | |
logger.error(f"Failed to upload to HF Hub: {str(e)}") | |
return f"Profile saved successfully for {name}." | |
except Exception as e: | |
logger.error(f"Profile save error: {str(e)}") | |
raise gr.Error(f"Couldn't save profile: {str(e)}") | |
def load_profile(self, name: str = None, session_token: str = None) -> Dict: | |
for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS): | |
try: | |
if session_token: | |
profile_pattern = f"*{session_token}_profile.json" | |
else: | |
profile_pattern = "*.json" | |
profiles = list(self.profiles_dir.glob(profile_pattern)) | |
if not profiles: | |
return {} | |
if name: | |
profile_file = self.get_profile_path(name) | |
if not profile_file.exists(): | |
if HF_TOKEN and hf_api: | |
try: | |
hf_api.download_file( | |
path_in_repo=f"profiles/{profile_file.name}", | |
repo_id="your-username/student-learning-assistant", | |
repo_type="dataset", | |
local_dir=self.profiles_dir | |
) | |
except Exception as e: | |
logger.warning(f"Failed to download profile: {str(e)}") | |
raise gr.Error(f"No profile found for {name}") | |
else: | |
raise gr.Error(f"No profile found for {name}") | |
else: | |
profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True) | |
profile_file = profiles[0] | |
with open(profile_file, "r", encoding='utf-8') as f: | |
profile_data = json.load(f) | |
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT: | |
raise gr.Error("Session expired. Please start a new session.") | |
if profile_data.get('version', '1.0') in ['2.0', '2.1']: | |
try: | |
profile_data['name'] = self.encryptor.decrypt(profile_data['name']) | |
profile_data['interests'] = self.encryptor.decrypt(profile_data.get('interests', '')) | |
if profile_data.get('blog'): | |
profile_data['blog'] = self.encryptor.decrypt(profile_data['blog']) | |
except Exception as e: | |
logger.error(f"Decryption error: {str(e)}") | |
raise gr.Error("Failed to decrypt profile data") | |
return profile_data | |
except json.JSONDecodeError as e: | |
if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1: | |
logger.error(f"Failed to load profile after {MAX_PROFILE_LOAD_ATTEMPTS} attempts") | |
raise gr.Error("Corrupted profile data") | |
time.sleep(0.5 * (attempt + 1)) | |
except Exception as e: | |
if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1: | |
raise | |
time.sleep(0.5 * (attempt + 1)) | |
def list_profiles(self, session_token: str = None) -> List[str]: | |
if session_token: | |
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json")) | |
else: | |
profiles = list(self.profiles_dir.glob("*.json")) | |
profile_names = [] | |
for p in profiles: | |
try: | |
with open(p, "r", encoding='utf-8') as f: | |
data = json.load(f) | |
if data.get('version', '1.0') in ['2.0', '2.1']: | |
try: | |
name = self.encryptor.decrypt(data['name']) | |
profile_names.append(name) | |
except: | |
profile_names.append(p.stem) | |
else: | |
profile_names.append(data.get('name', p.stem)) | |
except: | |
continue | |
return profile_names | |
def delete_profile(self, name: str, session_token: str = None) -> bool: | |
try: | |
profile_file = self.get_profile_path(name) | |
if not profile_file.exists(): | |
return False | |
with open(profile_file, "r", encoding='utf-8') as f: | |
data = json.load(f) | |
if session_token and data.get('session_token') != session_token: | |
return False | |
profile_file.unlink() | |
if HF_TOKEN and hf_api: | |
try: | |
hf_api.delete_file( | |
path_in_repo=f"profiles/{profile_file.name}", | |
repo_id="your-username/student-learning-assistant", | |
repo_type="dataset" | |
) | |
except Exception as e: | |
logger.error(f"Failed to delete from HF Hub: {str(e)}") | |
return True | |
except Exception as e: | |
logger.error(f"Error deleting profile: {str(e)}") | |
return False | |
# Initialize profile manager | |
profile_manager = EnhancedProfileManager() | |
class EducationalChatbot: | |
def __init__(self): | |
self.model_name = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" | |
self.tokenizer = None | |
self.model = None | |
self.educational_topics = { | |
'math': ['algebra', 'calculus', 'geometry', 'trigonometry'], | |
'science': ['biology', 'chemistry', 'physics', 'astronomy'], | |
'humanities': ['history', 'literature', 'philosophy'], | |
'languages': ['english', 'spanish', 'french', 'grammar'], | |
'arts': ['music', 'art', 'drama'], | |
'technology': ['programming', 'computer science'] | |
} | |
self.load_model() | |
def load_model(self): | |
"""Load the HuggingFace model""" | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
self.model = AutoModelForCausalLM.from_pretrained( | |
self.model_name, | |
torch_dtype=torch.float16 | |
) | |
logger.info("Educational chatbot model loaded") | |
def is_educational(self, question: str) -> bool: | |
"""Check if question is educational""" | |
question_lower = question.lower() | |
for category, topics in self.educational_topics.items(): | |
if any(topic in question_lower for topic in topics): | |
return True | |
return False | |
def generate_response(self, question: str, profile: Dict) -> Tuple[str, List[Dict]]: | |
"""Generate a personalized educational response""" | |
if not self.is_educational(question): | |
return ( | |
"I specialize in educational topics only. Please ask about subjects like math, " | |
"science, history, or literature. I can help with concepts, problem-solving methods, " | |
"and learning strategies.", | |
[] | |
) | |
# Get learning style from profile | |
learning_style = self._get_learning_style(profile) | |
# Generate base response using the model | |
prompt = self._build_prompt(question, profile) | |
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) | |
outputs = self.model.generate( | |
**inputs, | |
max_new_tokens=300, | |
temperature=0.7, | |
top_p=0.9, | |
repetition_penalty=1.2, | |
do_sample=True | |
) | |
raw_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Process the response to be more pedagogical | |
processed_response = self._make_response_pedagogical(raw_response) | |
# Add multimedia based on learning style | |
multimedia = self._get_multimedia_suggestions(processed_response, learning_style) | |
return processed_response, multimedia | |
def _get_learning_style(self, profile: Dict) -> str: | |
"""Extract learning style from profile""" | |
if not profile or 'learning_style' not in profile: | |
return 'balanced' | |
style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", | |
profile['learning_style']) | |
return style_match.group(1).lower() if style_match else 'balanced' | |
def _build_prompt(self, question: str, profile: Dict) -> str: | |
"""Build a personalized prompt for the model""" | |
base_prompt = ( | |
"You are an expert teaching assistant helping a student. Your role is to guide them " | |
"to discover answers themselves, not provide direct solutions. Use the Socratic method " | |
"by asking guiding questions and explaining concepts step-by-step.\n\n" | |
) | |
if profile: | |
# Add academic context if available | |
if 'transcript' in profile: | |
courses = [c['course_title'] for c in profile['transcript'].get('course_history', [])] | |
base_prompt += ( | |
f"The student has taken these courses: {', '.join(courses[:5])}. " | |
"Consider their academic background when responding.\n\n" | |
) | |
# Add learning style | |
learning_style = self._get_learning_style(profile) | |
if learning_style != 'balanced': | |
base_prompt += ( | |
f"The student is a {learning_style} learner. Adapt your teaching approach accordingly.\n\n" | |
) | |
base_prompt += ( | |
f"Student Question: {question}\n\n" | |
"Teaching Assistant Response:\n" | |
"1. First, let's understand the key concepts involved...\n" | |
"2. What do you think would be the first step in solving this?\n" | |
"3. Consider this approach...\n" | |
"4. Here's how we might break this down...\n" | |
"Remember, the goal is understanding, not just the answer." | |
) | |
return base_prompt | |
def _make_response_pedagogical(self, response: str) -> str: | |
"""Process the raw response to be more teaching-oriented""" | |
# Remove direct answers if present | |
response = re.sub(r"(the answer is|it is|direct solution:) .*?(\n|$)", "", response, flags=re.I) | |
# Add more guiding language | |
guiding_phrases = [ | |
"What do you think about...", | |
"Have you considered...", | |
"Let's break this down...", | |
"One approach might be...", | |
"Think about how you would...", | |
"What steps would you take to..." | |
] | |
# Ensure response has at least 2 guiding questions | |
if sum(1 for phrase in guiding_phrases if phrase.lower() in response.lower()) < 2: | |
response += "\n\n" + "\n".join(guiding_phrases[:2]) | |
return response | |
def _get_multimedia_suggestions(self, response: str, learning_style: str) -> List[Dict]: | |
"""Generate multimedia suggestions based on learning style and content""" | |
suggestions = [] | |
# Common educational platforms | |
resources = { | |
'visual': [ | |
{"type": "video", "source": "Khan Academy", "url": "https://www.khanacademy.org"}, | |
{"type": "diagram", "source": "Math is Fun", "url": "https://www.mathsisfun.com"}, | |
{"type": "infographic", "source": "InfoGram", "url": "https://infogram.com"} | |
], | |
'auditory': [ | |
{"type": "podcast", "source": "Stuff You Should Know", "url": "https://www.iheart.com/podcast/stuff-you-should-know-26940277"}, | |
{"type": "audio_lecture", "source": "The Great Courses", "url": "https://www.thegreatcourses.com"} | |
], | |
'reading/writing': [ | |
{"type": "article", "source": "Britannica", "url": "https://www.britannica.com"}, | |
{"type": "textbook", "source": "OpenStax", "url": "https://openstax.org"} | |
], | |
'kinesthetic': [ | |
{"type": "interactive", "source": "PhET Simulations", "url": "https://phet.colorado.edu"}, | |
{"type": "hands-on", "source": "Science Buddies", "url": "https://www.sciencebuddies.org"} | |
] | |
} | |
# Add general suggestions based on learning style | |
if learning_style in resources: | |
suggestions.extend(resources[learning_style][:2]) | |
# Add specific content based on response | |
if "math" in response.lower(): | |
suggestions.append({ | |
"type": "practice_problems", | |
"source": "Art of Problem Solving", | |
"url": "https://artofproblemsolving.com" | |
}) | |
elif "science" in response.lower(): | |
suggestions.append({ | |
"type": "experiment", | |
"source": "Science Journal", | |
"url": "https://sciencejournal.withgoogle.com" | |
}) | |
return suggestions | |
# Initialize the chatbot | |
educational_chatbot = EducationalChatbot() | |
class StudyCalendar: | |
def __init__(self): | |
self.default_study_blocks = { | |
'Monday': [('16:00', '17:30'), ('19:00', '20:30')], | |
'Tuesday': [('16:00', '17:30')], | |
'Wednesday': [('16:00', '17:30'), ('19:00', '20:30')], | |
'Thursday': [('16:00', '17:30')], | |
'Friday': [('15:00', '16:30')], | |
'Saturday': [('10:00', '12:00')], | |
'Sunday': [('14:00', '16:00')] | |
} | |
def generate_study_calendar(self, profile: Dict, start_date: str) -> Dict: | |
"""Generate a study calendar based on the student's profile""" | |
try: | |
calendar = { | |
'start_date': start_date, | |
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'), | |
'events': [], | |
'exams': [] | |
} | |
# Add regular study sessions | |
current_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') | |
end_date = datetime.datetime.strptime(calendar['end_date'], '%Y-%m-%d') | |
while current_date <= end_date: | |
day_name = current_date.strftime('%A') | |
if day_name in self.default_study_blocks: | |
for time_block in self.default_study_blocks[day_name]: | |
calendar['events'].append({ | |
'date': current_date.strftime('%Y-%m-%d'), | |
'title': 'Study Session', | |
'description': 'Focused study time', | |
'start_time': time_block[0], | |
'end_time': time_block[1], | |
'duration': f"{time_block[0]} to {time_block[1]}" | |
}) | |
current_date += datetime.timedelta(days=1) | |
# Add exams from transcript if available | |
transcript = profile.get('transcript', {}) | |
if transcript.get('course_history'): | |
for course in transcript['course_history']: | |
if 'exam' in course.get('course_title', '').lower(): | |
exam_date = (datetime.datetime.strptime(start_date, '%Y-%m-%d') + | |
datetime.timedelta(days=random.randint(7, 28))).strftime('%Y-%m-%d') | |
calendar['exams'].append({ | |
'date': exam_date, | |
'title': course.get('course_title', 'Exam'), | |
'description': 'Prepare by reviewing materials', | |
'duration': 'All day' | |
}) | |
return calendar | |
except Exception as e: | |
logger.error(f"Error generating study calendar: {str(e)}") | |
return { | |
'start_date': start_date, | |
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'), | |
'events': [], | |
'exams': [] | |
} | |
def create_calendar_visualization(self, calendar: Dict): | |
"""Create a visualization of the study calendar""" | |
try: | |
if not calendar.get('events') and not calendar.get('exams'): | |
return None | |
events_df = pd.DataFrame(calendar['events']) | |
exams_df = pd.DataFrame(calendar['exams']) | |
fig = px.timeline( | |
events_df, | |
x_start="start_time", | |
x_end="end_time", | |
y="date", | |
color_discrete_sequence=['#4CAF50'], | |
title="Study Schedule" | |
) | |
if not exams_df.empty: | |
fig.add_trace(px.timeline( | |
exams_df, | |
x_start=[datetime.time(0,0).strftime('%H:%M')] * len(exams_df), | |
x_end=[datetime.time(23,59).strftime('%H:%M')] * len(exams_df), | |
y="date", | |
color_discrete_sequence=['#F44336'] | |
).data[0]) | |
fig.update_layout( | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12), | |
showlegend=False | |
) | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating calendar visualization: {str(e)}") | |
return None | |
# Initialize study calendar | |
study_calendar = StudyCalendar() | |
class GoalTracker: | |
def __init__(self): | |
self.goals_file = Path("student_goals.json") | |
self.goals_file.touch(exist_ok=True) | |
def add_goal(self, student_name: str, goal_type: str, description: str, | |
target_date: str, target_value: Optional[float] = None) -> bool: | |
"""Add a new goal for the student""" | |
try: | |
if not validate_date(target_date): | |
raise ValueError("Invalid target date format. Please use YYYY-MM-DD") | |
goals = self._load_goals() | |
student_goals = goals.get(student_name, []) | |
new_goal = { | |
'id': str(len(student_goals) + 1), | |
'type': goal_type, | |
'description': description, | |
'target_date': target_date, | |
'target_value': target_value, | |
'created_at': datetime.datetime.now().isoformat(), | |
'progress': [] | |
} | |
student_goals.append(new_goal) | |
goals[student_name] = student_goals | |
with open(self.goals_file, 'w') as f: | |
json.dump(goals, f, indent=2) | |
return True | |
except Exception as e: | |
logger.error(f"Error adding goal: {str(e)}") | |
return False | |
def update_goal_progress(self, student_name: str, goal_id: str, | |
progress_value: float, notes: str = "") -> bool: | |
"""Update progress for a specific goal""" | |
try: | |
goals = self._load_goals() | |
if student_name not in goals: | |
return False | |
for goal in goals[student_name]: | |
if goal['id'] == goal_id: | |
goal['progress'].append({ | |
'date': datetime.datetime.now().isoformat(), | |
'value': progress_value, | |
'notes': notes | |
}) | |
break | |
with open(self.goals_file, 'w') as f: | |
json.dump(goals, f, indent=2) | |
return True | |
except Exception as e: | |
logger.error(f"Error updating goal progress: {str(e)}") | |
return False | |
def get_goals(self, student_name: str) -> List[Dict]: | |
"""Get all goals for a student""" | |
try: | |
goals = self._load_goals() | |
return goals.get(student_name, []) | |
except Exception as e: | |
logger.error(f"Error getting goals: {str(e)}") | |
return [] | |
def create_goal_visualization(self, goals: List[Dict]): | |
"""Create a visualization of goal progress""" | |
try: | |
if not goals: | |
return None | |
progress_data = [] | |
for goal in goals: | |
if goal.get('progress'): | |
last_progress = goal['progress'][-1] | |
progress_data.append({ | |
'Goal': goal['description'], | |
'Progress': last_progress['value'], | |
'Target': goal.get('target_value', 100), | |
'Type': goal['type'] | |
}) | |
if not progress_data: | |
return None | |
df = pd.DataFrame(progress_data) | |
fig = px.bar( | |
df, | |
x='Goal', | |
y=['Progress', 'Target'], | |
barmode='group', | |
title="Goal Progress", | |
color_discrete_map={ | |
'Progress': '#4CAF50', | |
'Target': '#2196F3' | |
} | |
) | |
fig.update_layout( | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(size=12) | |
) | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating goal visualization: {str(e)}") | |
return None | |
def _load_goals(self) -> Dict: | |
"""Load all goals from the file""" | |
try: | |
with open(self.goals_file, 'r') as f: | |
return json.load(f) | |
except (json.JSONDecodeError, FileNotFoundError): | |
return {} | |
# Initialize goal tracker | |
goal_tracker = GoalTracker() | |
def create_enhanced_interface(): | |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app: | |
session_token = gr.State(value=generate_session_token()) | |
profile_manager.set_session(session_token.value) | |
tab_completed = gr.State({ | |
0: False, | |
1: False, | |
2: False, | |
3: False, | |
4: False, | |
5: False | |
}) | |
app.css = """ | |
.gradio-container { | |
max-width: 1200px !important; | |
margin: 0 auto !important; | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
} | |
.tab-content { | |
padding: 20px !important; | |
border: 1px solid #e0e0e0 !important; | |
border-radius: 8px !important; | |
margin-top: 10px !important; | |
background-color: white; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
} | |
.completed-tab { | |
background: #4CAF50 !important; | |
color: white !important; | |
font-weight: bold; | |
} | |
.incomplete-tab { | |
background: #E0E0E0 !important; | |
color: #616161; | |
} | |
.nav-message { | |
padding: 12px; | |
margin: 10px 0; | |
border-radius: 6px; | |
background-color: #ffebee; | |
color: #c62828; | |
border-left: 4px solid #c62828; | |
} | |
.file-upload { | |
border: 2px dashed #4CAF50 !important; | |
padding: 25px !important; | |
border-radius: 8px !important; | |
text-align: center; | |
background-color: #f8f8f8; | |
} | |
.file-upload:hover { | |
background: #f1f8e9; | |
} | |
.progress-bar { | |
height: 6px; | |
background: linear-gradient(to right, #4CAF50, #8BC34A); | |
margin-bottom: 15px; | |
border-radius: 3px; | |
box-shadow: inset 0 1px 2px rgba(0,0,0,0.1); | |
} | |
.quiz-question { | |
margin-bottom: 15px; | |
padding: 15px; | |
background: #f5f5f5; | |
border-radius: 5px; | |
border-left: 4px solid #2196F3; | |
} | |
.quiz-results { | |
margin-top: 20px; | |
padding: 20px; | |
background: #e8f5e9; | |
border-radius: 8px; | |
border-left: 4px solid #4CAF50; | |
} | |
.error-message { | |
color: #d32f2f; | |
background-color: #ffebee; | |
padding: 12px; | |
border-radius: 6px; | |
margin: 10px 0; | |
border-left: 4px solid #d32f2f; | |
} | |
.transcript-results { | |
border-left: 4px solid #4CAF50 !important; | |
padding: 15px !important; | |
background: #f8f8f8 !important; | |
border-radius: 4px; | |
} | |
.error-box { | |
border: 1px solid #ff4444 !important; | |
background: #fff8f8 !important; | |
border-radius: 4px; | |
} | |
.metric-box { | |
background-color: white; | |
border-radius: 10px; | |
padding: 15px; | |
margin: 10px 0; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.1); | |
border-left: 4px solid #2196F3; | |
} | |
.recommendation { | |
background-color: #fff8e1; | |
padding: 10px; | |
border-left: 4px solid #ffc107; | |
margin: 5px 0; | |
border-radius: 4px; | |
} | |
.goal-card { | |
background-color: white; | |
border-radius: 8px; | |
padding: 15px; | |
margin: 10px 0; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
border-left: 4px solid #4CAF50; | |
} | |
.calendar-event { | |
background-color: #e3f2fd; | |
border-radius: 6px; | |
padding: 10px; | |
margin: 5px 0; | |
border-left: 4px solid #2196F3; | |
} | |
.dark .tab-content { | |
background-color: #2d2d2d !important; | |
border-color: #444 !important; | |
color: #eee !important; | |
} | |
.dark .quiz-question { | |
background-color: #3d3d3d !important; | |
color: #eee !important; | |
} | |
.dark .quiz-results { | |
background-color: #2e3d2e !important; | |
color: #eee !important; | |
} | |
.dark textarea, .dark input { | |
background-color: #333 !important; | |
color: #eee !important; | |
border-color: #555 !important; | |
} | |
.dark .output-markdown { | |
color: #eee !important; | |
} | |
.dark .chatbot { | |
background-color: #333 !important; | |
} | |
.dark .chatbot .user, .dark .chatbot .assistant { | |
color: #eee !important; | |
} | |
.dark .metric-box { | |
background-color: #333 !important; | |
color: #eee !important; | |
} | |
.dark .goal-card { | |
background-color: #333; | |
color: #eee; | |
} | |
.dark .calendar-event { | |
background-color: #1a3d5c; | |
color: #eee; | |
} | |
""" | |
with gr.Row(): | |
with gr.Column(scale=4): | |
gr.Markdown(""" | |
# π Student Learning Assistant | |
**Your personalized education companion** | |
Complete each step to get customized learning recommendations and academic planning. | |
""") | |
with gr.Column(scale=1): | |
dark_mode = gr.Checkbox(label="Dark Mode", value=False) | |
with gr.Row(): | |
with gr.Column(scale=1, min_width=100): | |
step1 = gr.Button("π 1. Transcript", elem_classes="incomplete-tab") | |
with gr.Column(scale=1, min_width=100): | |
step2 = gr.Button("π 2. Quiz", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step3 = gr.Button("π€ 3. Profile", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step4 = gr.Button("π 4. Review", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step5 = gr.Button("π¬ 5. Assistant", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step6 = gr.Button("π― 6. Goals", elem_classes="incomplete-tab", interactive=False) | |
nav_message = gr.HTML(visible=False) | |
with gr.Tabs(visible=True) as tabs: | |
with gr.Tab("Transcript", id=0): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### π Step 1: Upload Your Transcript") | |
with gr.Group(elem_classes="file-upload"): | |
file_input = gr.File( | |
label="Drag and drop your transcript here (PDF or Image)", | |
file_types=ALLOWED_FILE_TYPES, | |
type="filepath" | |
) | |
upload_btn = gr.Button("Analyze Transcript", variant="primary") | |
file_error = gr.HTML(visible=False) | |
with gr.Column(scale=2): | |
transcript_output = gr.Textbox( | |
label="Analysis Results", | |
lines=10, | |
interactive=False, | |
elem_classes="transcript-results" | |
) | |
with gr.Row(): | |
gpa_viz = gr.Plot(label="GPA Visualization", visible=False) | |
req_viz = gr.Plot(label="Requirements Visualization", visible=False) | |
with gr.Row(): | |
credits_viz = gr.Plot(label="Credits Distribution", visible=False) | |
rigor_viz = gr.Plot(label="Course Rigor", visible=False) | |
transcript_data = gr.State() | |
file_input.change( | |
fn=lambda f: ( | |
gr.update(visible=False), | |
gr.update(value="File ready for analysis!", visible=True) if f | |
else gr.update(value="Please upload a file", visible=False) | |
), | |
inputs=file_input, | |
outputs=[file_error, transcript_output] | |
) | |
def process_and_visualize(file_obj, tab_status): | |
try: | |
parsed_data = transcript_parser.parse_transcript(file_obj.name) | |
gpa_analysis = academic_analyzer.analyze_gpa(parsed_data) | |
grad_status = academic_analyzer.analyze_graduation_status(parsed_data) | |
college_recs = academic_analyzer.generate_college_recommendations(parsed_data) | |
results = [ | |
f"## π GPA Analysis", | |
f"**Rating:** {gpa_analysis['rating']}", | |
f"{gpa_analysis['description']}", | |
f"{gpa_analysis['comparison']}", | |
"", | |
f"## π Graduation Status", | |
grad_status['status'], | |
f"**Completion:** {grad_status['completion_percentage']:.1f}%", | |
"", | |
f"## π« College Recommendations" | |
] | |
if college_recs['reach']: | |
results.append("\n**Reach Schools:**") | |
results.extend([f"- {school}" for school in college_recs['reach'][:3]]) | |
if college_recs['target']: | |
results.append("\n**Target Schools:**") | |
results.extend([f"- {school}" for school in college_recs['target'][:3]]) | |
if gpa_analysis.get('improvement_tips'): | |
results.append("\n**Improvement Tips:**") | |
results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']]) | |
viz_updates = [ | |
gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None), | |
gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None), | |
gr.update(visible=data_visualizer.create_credits_distribution_visualization(parsed_data) is not None), | |
gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None) | |
] | |
tab_status[0] = True | |
return "\n".join(results), parsed_data, *viz_updates, tab_status | |
except Exception as e: | |
error_msg = f"Error processing transcript: {str(e)}" | |
logger.error(error_msg) | |
raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<{MAX_FILE_SIZE_MB}MB)") | |
upload_btn.click( | |
fn=process_and_visualize, | |
inputs=[file_input, tab_completed], | |
outputs=[transcript_output, transcript_data, gpa_viz, req_viz, credits_viz, rigor_viz, tab_completed] | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step1 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step2 | |
) | |
with gr.Tab("Learning Style Quiz", id=1): | |
with gr.Column(): | |
gr.Markdown("### π Step 2: Discover Your Learning Style") | |
progress = gr.HTML("<div class='progress-bar' style='width: 0%'></div>") | |
quiz_components = [] | |
with gr.Accordion("Quiz Questions", open=True): | |
for i, (question, options) in enumerate(zip(learning_style_quiz.questions, learning_style_quiz.options)): | |
with gr.Group(elem_classes="quiz-question"): | |
q = gr.Radio( | |
options, | |
label=f"{i+1}. {question}", | |
show_label=True | |
) | |
quiz_components.append(q) | |
with gr.Row(): | |
quiz_submit = gr.Button("Submit Quiz", variant="primary") | |
quiz_clear = gr.Button("Clear Answers") | |
quiz_alert = gr.HTML(visible=False) | |
learning_output = gr.Markdown( | |
label="Your Learning Style Results", | |
visible=False, | |
elem_classes="quiz-results" | |
) | |
for component in quiz_components: | |
component.change( | |
fn=lambda *answers: { | |
progress: gr.HTML( | |
f"<div class='progress-bar' style='width: {sum(1 for a in answers if a)/len(answers)*100}%'></div>" | |
) | |
}, | |
inputs=quiz_components, | |
outputs=progress | |
) | |
quiz_submit.click( | |
fn=lambda *answers: learning_style_quiz.evaluate_quiz(*answers), | |
inputs=quiz_components, | |
outputs=learning_output | |
).then( | |
fn=lambda: gr.update(visible=True), | |
outputs=learning_output | |
).then( | |
fn=lambda: {1: True}, | |
inputs=None, | |
outputs=tab_completed | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step2 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step3 | |
) | |
quiz_clear.click( | |
fn=lambda: [None] * len(quiz_components), | |
outputs=quiz_components | |
).then( | |
fn=lambda: gr.HTML("<div class='progress-bar' style='width: 0%'></div>"), | |
outputs=progress | |
) | |
with gr.Tab("Personal Profile", id=2): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### π€ Step 3: Tell Us About Yourself") | |
with gr.Group(): | |
name = gr.Textbox(label="Full Name", placeholder="Your name") | |
age = gr.Number(label="Age", minimum=MIN_AGE, maximum=MAX_AGE, precision=0) | |
interests = gr.Textbox( | |
label="Your Interests/Hobbies", | |
placeholder="e.g., Science, Music, Sports, Art..." | |
) | |
save_personal_btn = gr.Button("Save Information", variant="primary") | |
save_confirmation = gr.HTML(visible=False) | |
with gr.Column(scale=1): | |
gr.Markdown("### β€οΈ Favorites") | |
with gr.Group(): | |
movie = gr.Textbox(label="Favorite Movie") | |
movie_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
show = gr.Textbox(label="Favorite TV Show") | |
show_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
book = gr.Textbox(label="Favorite Book") | |
book_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
character = gr.Textbox(label="Favorite Character (from any story)") | |
character_reason = gr.Textbox(label="Why do you like them?", lines=2) | |
with gr.Accordion("Personal Blog (Optional)", open=False): | |
blog = gr.Textbox( | |
label="Share your thoughts", | |
placeholder="Write something about yourself...", | |
lines=5 | |
) | |
save_personal_btn.click( | |
fn=lambda n, a, i, ts: ( | |
{2: True}, | |
gr.update(elem_classes="completed-tab"), | |
gr.update(interactive=True), | |
gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True) | |
), | |
inputs=[name, age, interests, tab_completed], | |
outputs=[tab_completed, step3, step4, save_confirmation] | |
) | |
with gr.Tab("Save Profile", id=3): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### π Step 4: Review & Save Your Profile") | |
with gr.Group(): | |
load_profile_dropdown = gr.Dropdown( | |
label="Load Existing Profile", | |
choices=profile_manager.list_profiles(session_token.value), | |
visible=False | |
) | |
with gr.Row(): | |
load_btn = gr.Button("Load", visible=False) | |
delete_btn = gr.Button("Delete", variant="stop", visible=False) | |
save_btn = gr.Button("Save Profile", variant="primary") | |
clear_btn = gr.Button("Clear Form") | |
with gr.Column(scale=2): | |
output_summary = gr.Markdown( | |
"Your profile summary will appear here after saving.", | |
label="Profile Summary" | |
) | |
with gr.Row(): | |
req_viz_matplotlib = gr.Plot(label="Requirements Progress", visible=False) | |
credits_viz_matplotlib = gr.Plot(label="Credits Distribution", visible=False) | |
save_btn.click( | |
fn=profile_manager.save_profile, | |
inputs=[ | |
name, age, interests, transcript_data, learning_output, | |
movie, movie_reason, show, show_reason, | |
book, book_reason, character, character_reason, blog | |
], | |
outputs=output_summary | |
).then( | |
fn=lambda td: ( | |
gr.update(visible=data_visualizer.create_requirements_visualization(td) is not None), | |
gr.update(visible=data_visualizer.create_credits_distribution_visualization(td) is not None) | |
), | |
inputs=transcript_data, | |
outputs=[req_viz_matplotlib, credits_viz_matplotlib] | |
).then( | |
fn=lambda: {3: True}, | |
inputs=None, | |
outputs=tab_completed | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step4 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step5 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step6 | |
).then( | |
fn=lambda: profile_manager.list_profiles(session_token.value), | |
outputs=load_profile_dropdown | |
).then( | |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))), | |
outputs=load_btn | |
).then( | |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))), | |
outputs=delete_btn | |
) | |
load_btn.click( | |
fn=lambda: profile_manager.load_profile(load_profile_dropdown.value, session_token.value), | |
inputs=None, | |
outputs=None | |
).then( | |
fn=lambda profile: ( | |
profile.get('name', ''), | |
profile.get('age', ''), | |
profile.get('interests', ''), | |
profile.get('learning_style', ''), | |
profile.get('favorites', {}).get('movie', ''), | |
profile.get('favorites', {}).get('movie_reason', ''), | |
profile.get('favorites', {}).get('show', ''), | |
profile.get('favorites', {}).get('show_reason', ''), | |
profile.get('favorites', {}).get('book', ''), | |
profile.get('favorites', {}).get('book_reason', ''), | |
profile.get('favorites', {}).get('character', ''), | |
profile.get('favorites', {}).get('character_reason', ''), | |
profile.get('blog', ''), | |
profile.get('transcript', {}), | |
gr.update(value="Profile loaded successfully!"), | |
data_visualizer.create_requirements_visualization(profile.get('transcript', {})), | |
data_visualizer.create_credits_distribution_visualization(profile.get('transcript', {})) | |
), | |
inputs=None, | |
outputs=[ | |
name, age, interests, learning_output, | |
movie, movie_reason, show, show_reason, | |
book, book_reason, character, character_reason, | |
blog, transcript_data, output_summary, | |
req_viz_matplotlib, credits_viz_matplotlib | |
] | |
) | |
with gr.Tab("AI Teaching Assistant", id=4): | |
gr.Markdown("## π¬ Your Personalized Teaching Assistant") | |
gr.Markdown("Ask educational questions about any subject. I'll guide you to discover the answers yourself.") | |
chatbot = gr.Chatbot(height=500) | |
msg = gr.Textbox(label="Your Educational Question") | |
clear = gr.Button("Clear Chat") | |
def respond(message: str, chat_history: List, profile: Dict) -> Tuple[str, List]: | |
"""Handle chat responses with multimedia""" | |
response, multimedia = educational_chatbot.generate_response(message, profile) | |
# Format multimedia suggestions | |
if multimedia: | |
response += "\n\n**Suggested Resources:**\n" | |
for item in multimedia: | |
response += f"- [{item['type'].title()}] {item['source']}: {item['url']}\n" | |
chat_history.append((message, response)) | |
return "", chat_history | |
msg.submit( | |
respond, | |
inputs=[msg, chatbot, gr.State(profile_manager.load_profile(session_token.value))], | |
outputs=[msg, chatbot] | |
) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
with gr.Tab("Goals & Planning", id=5): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### π― Step 5: Set Academic Goals") | |
with gr.Group(): | |
goal_type = gr.Dropdown( | |
label="Goal Type", | |
choices=["GPA Improvement", "Course Completion", "Test Score", "Other"], | |
value="GPA Improvement" | |
) | |
goal_description = gr.Textbox(label="Goal Description") | |
goal_target_date = gr.Textbox(label="Target Date (YYYY-MM-DD)", placeholder="2025-12-31") | |
goal_target_value = gr.Number(label="Target Value (if applicable)", visible=False) | |
add_goal_btn = gr.Button("Add Goal", variant="primary") | |
gr.Markdown("### π Study Calendar") | |
calendar_start_date = gr.Textbox(label="Calendar Start Date (YYYY-MM-DD)", value=datetime.date.today().isoformat()) | |
generate_calendar_btn = gr.Button("Generate Study Calendar") | |
with gr.Column(scale=2): | |
gr.Markdown("### Your Goals") | |
goals_output = gr.HTML() | |
goal_viz = gr.Plot(label="Goal Progress", visible=False) | |
gr.Markdown("### Your Study Calendar") | |
calendar_output = gr.HTML() | |
calendar_viz = gr.Plot(label="Calendar Visualization", visible=False) | |
goal_type.change( | |
fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]), | |
inputs=goal_type, | |
outputs=goal_target_value | |
) | |
def update_goals_display(profile_name): | |
goals = goal_tracker.get_goals(profile_name) | |
if not goals: | |
return ( | |
"<div class='alert-box'>No goals set yet. Add your first goal above!</div>", | |
gr.update(visible=False) | |
) | |
goals_html = [] | |
for goal in goals: | |
progress = goal['progress'][-1]['value'] if goal['progress'] else 0 | |
target = goal['target_value'] if goal['target_value'] is not None else "N/A" | |
goals_html.append(f""" | |
<div class='goal-card'> | |
<h4>{goal['description']}</h4> | |
<p><strong>Type:</strong> {goal['type']}</p> | |
<p><strong>Target Date:</strong> {goal['target_date']}</p> | |
<p><strong>Progress:</strong> {progress} / {target}</p> | |
{f"<p><strong>Last Note:</strong> {goal['progress'][-1]['notes']}</p>" if goal['progress'] else ""} | |
</div> | |
""") | |
return ( | |
"\n".join(goals_html), | |
gr.update(visible=goal_tracker.create_goal_visualization(goals) is not None) | |
) | |
def update_calendar_display(profile_name, start_date_str): | |
try: | |
start_date = datetime.date.fromisoformat(start_date_str) | |
except ValueError: | |
return ( | |
"<div class='error-message'>Invalid date format. Please use YYYY-MM-DD</div>", | |
gr.update(visible=False) | |
) | |
profile = profile_manager.load_profile(profile_name, session_token.value) | |
if not profile: | |
return ( | |
"<div class='alert-box'>Please complete and save your profile first</div>", | |
gr.update(visible=False) | |
) | |
calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat()) | |
calendar_html = [] | |
current_date = datetime.date.fromisoformat(calendar['start_date']) | |
end_date = datetime.date.fromisoformat(calendar['end_date']) | |
while current_date <= end_date: | |
day_events = [ | |
e for e in calendar['events'] | |
if datetime.date.fromisoformat(e['date']) == current_date | |
] | |
day_exams = [ | |
e for e in calendar['exams'] | |
if datetime.date.fromisoformat(e['date']) == current_date | |
] | |
if day_events or day_exams: | |
calendar_html.append(f"<h4>{current_date.strftime('%A, %B %d')}</h4>") | |
for event in day_events: | |
calendar_html.append(f""" | |
<div class='calendar-event'> | |
<p><strong>π {event['title']}</strong></p> | |
<p>β±οΈ {event['duration']}</p> | |
<p>{event['description']}</p> | |
</div> | |
""") | |
for exam in day_exams: | |
calendar_html.append(f""" | |
<div class='calendar-event' style='border-left-color: #f44336;'> | |
<p><strong>π {exam['title']}</strong></p> | |
<p>β° All day</p> | |
<p>Prepare by reviewing materials and practicing problems</p> | |
</div> | |
""") | |
current_date += datetime.timedelta(days=1) | |
return ( | |
"\n".join(calendar_html) if calendar_html else "<div class='alert-box'>No study sessions scheduled yet</div>", | |
gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None) | |
) | |
add_goal_btn.click( | |
fn=lambda gt, desc, date, val: ( | |
goal_tracker.add_goal(name.value, gt, desc, date, val), | |
update_goals_display(name.value) | |
), | |
inputs=[goal_type, goal_description, goal_target_date, goal_target_value], | |
outputs=[goals_output, goal_viz] | |
).then( | |
fn=lambda: name.value, | |
inputs=None, | |
outputs=None | |
).then( | |
fn=update_goals_display, | |
inputs=name, | |
outputs=[goals_output, goal_viz] | |
) | |
generate_calendar_btn.click( | |
fn=lambda date: update_calendar_display(name.value, date), | |
inputs=calendar_start_date, | |
outputs=[calendar_output, calendar_viz] | |
) | |
def navigate_to_tab(tab_index: int, tab_completed_status: dict): | |
for i in range(tab_index): | |
if not tab_completed_status.get(i, False): | |
messages = [ | |
"Please complete the transcript analysis first", | |
"Please complete the learning style quiz first", | |
"Please fill out your personal information first", | |
"Please save your profile first", | |
"Please complete the previous steps first" | |
] | |
return ( | |
gr.Tabs(selected=i), | |
gr.update( | |
value=f"<div class='error-message'>β {messages[i]}</div>", | |
visible=True | |
) | |
) | |
return gr.Tabs(selected=tab_index), gr.update(visible=False) | |
step1.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(0), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step2.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(1), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step3.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(2), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step4.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(3), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step5.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(4), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step6.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(5), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
def toggle_dark_mode(dark): | |
return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate") | |
dark_mode.change( | |
fn=toggle_dark_mode, | |
inputs=dark_mode, | |
outputs=None | |
) | |
app.load(fn=lambda: get_model_and_tokenizer(), outputs=[]) | |
return app | |
app = create_enhanced_interface() | |
if __name__ == "__main__": | |
app.launch(server_name="0.0.0.0", server_port=7860) | |