Final_project / app.py
Dannyar608's picture
Update app.py
31a3aab verified
import gradio as gr
import pandas as pd
import json
import os
import re
from PyPDF2 import PdfReader
from collections import defaultdict
from typing import Dict, List, Optional, Tuple, Union
import html
from pathlib import Path
import fitz # PyMuPDF
import pytesseract
from PIL import Image
import io
import secrets
import string
from huggingface_hub import HfApi, HfFolder
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import time
import logging
import asyncio
from functools import lru_cache
import hashlib
from concurrent.futures import ThreadPoolExecutor
from pydantic import BaseModel
import plotly.express as px
import pdfplumber
from io import BytesIO
import base64
import datetime
from cryptography.fernet import Fernet
import calendar
from dateutil.relativedelta import relativedelta
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
# Enhanced Configuration
PROFILES_DIR = "student_profiles"
ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"]
MAX_FILE_SIZE_MB = 10
MIN_AGE = 5
MAX_AGE = 120
SESSION_TOKEN_LENGTH = 32
HF_TOKEN = os.getenv("HF_TOKEN")
ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY", Fernet.generate_key().decode())
SESSION_TIMEOUT = 3600 * 3
MAX_CONTEXT_HISTORY = 10
MAX_PROFILE_LOAD_ATTEMPTS = 3
# Initialize logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('transcript_parser.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Model configuration
MODEL_NAME = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
@lru_cache(maxsize=1)
def get_model_and_tokenizer():
"""Load and cache the model and tokenizer"""
logger.info("Loading model and tokenizer...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16
)
logger.info("Model and tokenizer loaded successfully")
return model, tokenizer
except Exception as e:
logger.error(f"Failed to load model: {str(e)}")
raise
# Initialize Hugging Face API
if HF_TOKEN:
hf_api = None
for attempt in range(3):
try:
hf_api = HfApi(token=HF_TOKEN)
HfFolder.save_token(HF_TOKEN)
logger.info("Hugging Face API initialized successfully")
break
except Exception as e:
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
time.sleep(2 ** attempt)
class DataEncryptor:
def __init__(self, key: str):
self.cipher = Fernet(key.encode())
def encrypt(self, data: str) -> str:
return self.cipher.encrypt(data.encode()).decode()
def decrypt(self, encrypted_data: str) -> str:
return self.cipher.decrypt(encrypted_data.encode()).decode()
encryptor = DataEncryptor(ENCRYPTION_KEY)
def generate_session_token() -> str:
alphabet = string.ascii_letters + string.digits
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
def sanitize_input(text: str) -> str:
if not text:
return ""
text = html.escape(text.strip())
text = re.sub(r'<[^>]*>', '', text)
text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
return text
def validate_name(name: str) -> str:
name = name.strip()
if not name:
raise ValueError("Name cannot be empty.")
if len(name) > 100:
raise ValueError("Name is too long (maximum 100 characters).")
if any(c.isdigit() for c in name):
raise ValueError("Name cannot contain numbers.")
return name
def validate_age(age: Union[int, float, str]) -> int:
try:
age_int = int(age)
if not MIN_AGE <= age_int <= MAX_AGE:
raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
return age_int
except (ValueError, TypeError):
raise ValueError("Please enter a valid age number.")
def validate_file(file_obj) -> None:
if not file_obj:
raise ValueError("Please upload a file first")
file_ext = os.path.splitext(file_obj.name)[1].lower()
if file_ext not in ALLOWED_FILE_TYPES:
raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
if file_size > MAX_FILE_SIZE_MB:
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
def validate_date(date_str: str) -> bool:
try:
datetime.datetime.strptime(date_str, '%Y-%m-%d')
return True
except ValueError:
return False
def remove_sensitive_info(text: str) -> str:
patterns = [
(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
(r'\b\d{6,9}\b', '[ID]'),
(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP]'),
(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),
(r'\b\d{3}\) \d{3}-\d{4}\b', '[PHONE]'),
(r'\b\d{1,5} [A-Z][a-z]+ [A-Z][a-z]+, [A-Z]{2} \d{5}\b', '[ADDRESS]')
]
for pattern, replacement in patterns:
text = re.sub(pattern, replacement, text)
return text
class LearningStyleQuiz:
def __init__(self):
self.questions = [
"When learning something new, I prefer to:",
"I remember information best when I:",
"When giving directions, I:",
"When I'm bored, I tend to:",
"When learning a new skill, I prefer to:",
"When studying, I like to:",
"I prefer teachers who:",
"When solving problems, I:",
"When working on a group project, I:",
"My ideal study environment is:",
"When preparing for a test, I:",
"When reading instructions, I:",
"When explaining something to someone, I:",
"When taking notes in class, I:",
"When using a new device or app, I:",
"When remembering names, I:",
"When choosing a book to read, I:",
"When giving a presentation, I:",
"When organizing my work, I:",
"When relaxing, I enjoy:"
]
self.options = [
["See diagrams and charts", "Listen to explanations", "Read about it", "Try it out hands-on"],
["See pictures or diagrams", "Hear someone explain it", "Read about it", "Do something physical with it"],
["Draw a map", "Give verbal instructions", "Write down directions", "Demonstrate or guide physically"],
["Doodle or look around", "Talk to myself or others", "Read or imagine things", "Fidget or move around"],
["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"],
["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"],
["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"],
["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"],
["Create visual plans", "Discuss ideas verbally", "Write detailed plans", "Take on hands-on tasks"],
["Somewhere quiet with good lighting", "Somewhere I can discuss ideas", "A library with lots of resources", "Somewhere I can move around"],
["Create visual study aids", "Recite information aloud", "Write summaries", "Create physical models"],
["Look at diagrams first", "Have someone explain them", "Read them carefully", "Try to follow them as I go"],
["Draw diagrams or pictures", "Explain verbally", "Write detailed explanations", "Show by doing"],
["Draw diagrams and symbols", "Record lectures to listen later", "Write detailed notes", "Underline and highlight"],
["Look at the screen layout", "Listen to audio instructions", "Read the manual", "Start clicking buttons"],
["Remember faces better than names", "Remember names when I hear them", "Remember names when I see them written", "Remember people by activities we did"],
["Choose books with pictures/diagrams", "Choose audiobooks", "Choose text-heavy books", "Choose interactive books"],
["Use lots of visual aids", "Focus on my verbal delivery", "Provide handouts", "Use props or demonstrations"],
["Use color-coding systems", "Talk through my plan", "Make detailed lists", "Physically arrange materials"],
["Watching videos or art", "Listening to music/podcasts", "Reading", "Doing physical activities"]
]
self.learning_styles = {
'visual': "**Visual** learners prefer seeing information in charts, diagrams, and pictures.",
'auditory': "**Auditory** learners prefer hearing information spoken and learn best through lectures and discussions.",
'reading/writing': "**Reading/Writing** learners prefer information displayed as words and learn best through reading and note-taking.",
'kinesthetic': "**Kinesthetic** learners prefer physical experience and learn best through hands-on activities and movement."
}
def evaluate_quiz(self, *answers):
"""Evaluate quiz answers and determine learning style"""
if not answers or any(a is None for a in answers):
raise gr.Error("Please answer all questions before submitting")
style_counts = {
'visual': 0,
'auditory': 0,
'reading/writing': 0,
'kinesthetic': 0
}
for answer in answers:
if answer.startswith("See") or answer.startswith("Draw") or answer.startswith("Watch") or "diagram" in answer.lower():
style_counts['visual'] += 1
elif answer.startswith("Listen") or answer.startswith("Hear") or answer.startswith("Talk") or "lecture" in answer.lower():
style_counts['auditory'] += 1
elif answer.startswith("Read") or "note" in answer.lower() or "write" in answer.lower():
style_counts['reading/writing'] += 1
elif answer.startswith("Try") or "physical" in answer.lower() or "hands-on" in answer.lower():
style_counts['kinesthetic'] += 1
primary_style = max(style_counts, key=style_counts.get)
secondary_styles = sorted(style_counts.items(), key=lambda x: x[1], reverse=True)[1:3]
result = [
"## 🎯 Your Learning Style Results",
f"Your primary learning style is **{primary_style.capitalize()}**",
self.learning_styles[primary_style],
"",
"### Tips for Your Learning Style:"
]
if primary_style == 'visual':
result.extend([
"- Use color coding in your notes",
"- Create mind maps and diagrams",
"- Watch educational videos to visualize concepts",
"- Highlight or underline important information"
])
elif primary_style == 'auditory':
result.extend([
"- Record lectures and listen to them",
"- Explain concepts out loud to yourself",
"- Participate in study groups",
"- Use rhymes or songs to remember information"
])
elif primary_style == 'reading/writing':
result.extend([
"- Write detailed summaries in your own words",
"- Create question-answer sets for each topic",
"- Rewrite your notes to reinforce learning",
"- Read textbooks and articles on the subject"
])
elif primary_style == 'kinesthetic':
result.extend([
"- Use hands-on activities when possible",
"- Study while moving or pacing",
"- Create physical models to represent concepts",
"- Take frequent short breaks to move around"
])
result.extend([
"",
"### Secondary Learning Styles:",
f"1. {secondary_styles[0][0].capitalize()}",
f"2. {secondary_styles[1][0].capitalize()}"
])
return "\n".join(result)
# Initialize learning style quiz
learning_style_quiz = LearningStyleQuiz()
class MiamiDadeTranscriptParser:
def __init__(self):
self.patterns = {
'student_info': re.compile(
r"LEGAL NAME:\s*([^\n]+?)\s*MAILING\s+ADDRESS:.*?"
r"GRADE LEVEL:\s*(\d+).*?"
r"FL STUDENT ID:\s*(\w+).*?"
r"CURRENT SCHOOL:\s*(\d+\s+[^\n]+?)\s*\(",
re.DOTALL
),
'gpa': re.compile(
r"DISTRICT:\s*([\d.]+).*?STATE:\s*([\d.]+)",
re.DOTALL
),
'credits': re.compile(
r"\*\s+([A-Z\s/]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*",
re.DOTALL
),
'course': re.compile(
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
re.DOTALL
),
'assessment': re.compile(
r"ENGLISH/LANGUAGE ARTS:\s*(\d{2}/\d{4})|"
r"ALGEBRA I ASSESSMENT REQUIREMENT MET:\s*(YES|NO)|"
r"BIOLOGY ASSESSMENT PASSED|"
r"DISTRICT COMM/VOL SERVICE RQMT MET:\s*(YES).*?HRS:\s*(\d+)",
re.DOTALL
),
'class_rank': re.compile(
r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)",
re.DOTALL
),
'course_alt': re.compile(
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
re.DOTALL
)
}
def parse_transcript(self, file_path: str) -> Dict:
"""Parse Miami-Dade transcript PDF with multiple extraction methods"""
try:
# First try pdfplumber with progress bar
text = ""
with pdfplumber.open(file_path) as pdf:
with tqdm(total=len(pdf.pages), desc="Processing transcript") as pbar:
for page in pdf.pages:
text += page.extract_text() + "\n"
pbar.update(1)
# Fallback to PyMuPDF if text extraction is poor
if len(text) < 500:
logger.warning("Low text extraction with pdfplumber, trying PyMuPDF")
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
return self._parse_miami_dade_format(text)
except pdfplumber.PDFSyntaxError as e:
error_msg = "Invalid PDF file. Please ensure you're uploading a valid transcript PDF."
logger.error(f"{error_msg}: {str(e)}")
raise ValueError(f"{error_msg} If the problem persists, try converting the file to a different format.")
except Exception as e:
logger.error(f"Error parsing transcript: {str(e)}")
raise ValueError(f"Error processing transcript: {str(e)}")
def _parse_miami_dade_format(self, text: str) -> Dict:
"""Parse the specific Miami-Dade transcript format"""
parsed_data = {
'student_info': self._parse_student_info(text),
'academic_summary': self._parse_academic_summary(text),
'course_history': self._parse_courses(text),
'assessments': self._parse_assessments(text),
'format': 'miami_dade_v3'
}
# Validate we got at least some data
if not parsed_data['student_info'] or not parsed_data['course_history']:
raise ValueError("Incomplete data extracted from transcript")
return parsed_data
def _parse_student_info(self, text: str) -> Dict:
"""Extract student information with improved pattern matching"""
match = self.patterns['student_info'].search(text)
if not match:
return {}
return {
'name': match.group(1).strip(),
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
'birth_date': self._extract_birth_date(text),
'ethnicity': self._extract_ethnicity(text)
}
def _extract_birth_date(self, text: str) -> Optional[str]:
"""Extract birth date from transcript"""
birth_match = re.search(r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})", text)
if birth_match:
return birth_match.group(1)
return None
def _extract_ethnicity(self, text: str) -> Optional[str]:
"""Extract ethnicity information"""
eth_match = re.search(r"ETHNICITY:\s*([^\n]+)", text)
if eth_match:
return eth_match.group(1).strip()
return None
def _parse_academic_summary(self, text: str) -> Dict:
"""Parse academic summary section"""
summary = {
'gpa': {'district': None, 'state': None},
'credits': {},
'class_rank': {'percentile': None, 'class_size': None}
}
# GPA
gpa_match = self.patterns['gpa'].search(text)
if gpa_match:
summary['gpa']['district'] = float(gpa_match.group(1))
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
# Credits
credits_matches = self.patterns['credits'].finditer(text)
for match in credits_matches:
subject = match.group(1).strip()
summary['credits'][subject] = {
'earned': float(match.group(2)),
'required': float(match.group(3)) if match.group(3) else None,
'remaining': float(match.group(4)) if match.group(4) else None
}
# Class Rank
rank_match = self.patterns['class_rank'].search(text)
if rank_match:
summary['class_rank']['percentile'] = int(rank_match.group(1))
summary['class_rank']['class_size'] = int(rank_match.group(2))
return summary
def _parse_courses(self, text: str) -> List[Dict]:
"""Parse course history section"""
courses = []
# Try primary pattern first
for match in self.patterns['course'].finditer(text):
courses.append({
'term': match.group(1),
'course_code': match.group(2),
'course_title': match.group(3).strip(),
'subject_area': match.group(4),
'grade': match.group(5),
'flag': match.group(6),
'credit_status': match.group(7),
'credit_attempted': float(match.group(8)),
'credit_earned': float(match.group(9))
})
# If no courses found, try alternative pattern
if not courses:
for match in self.patterns['course_alt'].finditer(text):
courses.append({
'term': match.group(1),
'course_code': match.group(2),
'course_title': match.group(3).strip(),
'subject_area': match.group(4),
'grade': match.group(5),
'credit_attempted': float(match.group(6)),
'credit_earned': float(match.group(7))
})
return courses
def _parse_assessments(self, text: str) -> Dict:
"""Parse assessment and requirement information"""
assessments = {
'ela_passed_date': None,
'algebra_passed': False,
'biology_passed': False,
'community_service': {
'met': False,
'hours': 0
}
}
matches = self.patterns['assessment'].finditer(text)
for match in matches:
if match.group(1): # ELA date
assessments['ela_passed_date'] = match.group(1)
elif match.group(2): # Algebra
assessments['algebra_passed'] = match.group(2) == "YES"
elif "BIOLOGY" in match.group(0):
assessments['biology_passed'] = True
elif "SERVICE" in match.group(0):
assessments['community_service'] = {
'met': True,
'hours': int(match.group(4)) if match.group(4) else 0
}
return assessments
# Initialize the parser
transcript_parser = MiamiDadeTranscriptParser()
class AcademicAnalyzer:
def __init__(self):
self.gpa_scale = {
'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7,
'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'F': 0.0
}
self.college_tiers = {
'ivy_league': {'gpa': 4.3, 'rigor': 8, 'service': 100},
'top_tier': {'gpa': 4.0, 'rigor': 6, 'service': 80},
'competitive': {'gpa': 3.7, 'rigor': 4, 'service': 60},
'good': {'gpa': 3.3, 'rigor': 2, 'service': 40},
'average': {'gpa': 2.7, 'rigor': 1, 'service': 20}
}
def analyze_gpa(self, parsed_data: Dict) -> Dict:
analysis = {
'rating': '',
'description': '',
'comparison': '',
'improvement_tips': []
}
try:
# Handle multiple transcript formats
if parsed_data.get('format') == 'progress_summary':
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
elif parsed_data.get('format') == 'miami_dade_v3':
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
else: # Alternative format
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', 0))
unweighted_gpa = weighted_gpa # Assume same if not specified
if weighted_gpa >= 4.5:
analysis['rating'] = 'Excellent'
analysis['description'] = "🌟 You're in the top tier of students with a highly competitive GPA."
analysis['comparison'] = "This puts you in the top 5% of students nationally."
analysis['improvement_tips'] = [
"Consider taking advanced courses to challenge yourself",
"Look into college-level courses or research opportunities"
]
elif weighted_gpa >= 4.0:
analysis['rating'] = 'Strong'
analysis['description'] = "πŸ‘ Your GPA is strong and competitive for most colleges."
analysis['comparison'] = "This is above the national average and competitive for many universities."
analysis['improvement_tips'] = [
"Maintain your current study habits",
"Consider adding 1-2 more challenging courses"
]
elif weighted_gpa >= 3.5:
analysis['rating'] = 'Good'
analysis['description'] = "ℹ️ Your GPA is good but could be improved for more competitive schools."
analysis['comparison'] = "This is slightly above the national average."
analysis['improvement_tips'] = [
"Focus on improving in your weaker subjects",
"Consider getting tutoring for challenging courses",
"Develop better study habits and time management"
]
elif weighted_gpa >= 3.0:
analysis['rating'] = 'Average'
analysis['description'] = "⚠️ Your GPA is average. Focus on improvement for better college options."
analysis['comparison'] = "This is around the national average."
analysis['improvement_tips'] = [
"Identify your weakest subjects and focus on them",
"Develop a consistent study schedule",
"Seek help from teachers or tutors",
"Consider retaking courses with low grades if possible"
]
else:
analysis['rating'] = 'Below Average'
analysis['description'] = "❌ Your GPA is below average. Please consult with your academic advisor."
analysis['comparison'] = "This is below the national average and may limit college options."
analysis['improvement_tips'] = [
"Meet with your school counselor immediately",
"Develop a structured improvement plan",
"Consider summer school or credit recovery options",
"Focus on fundamental study skills"
]
if weighted_gpa > 0 and unweighted_gpa > 0:
diff = weighted_gpa - unweighted_gpa
if diff > 0.5:
analysis['comparison'] += "\n\nThe significant difference between your weighted and unweighted GPA suggests you're taking many advanced courses."
elif diff > 0.2:
analysis['comparison'] += "\n\nThe moderate difference between your weighted and unweighted GPA suggests a good balance of standard and advanced courses."
else:
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses."
return analysis
except Exception as e:
logger.error(f"GPA analysis error: {str(e)}")
return {
'rating': 'Unknown',
'description': 'Could not analyze GPA - data may be missing or incomplete',
'comparison': 'Please verify your transcript contains GPA information',
'improvement_tips': [
"Check that your transcript includes GPA information",
"Ensure the file is clear and all text was extracted properly"
]
}
def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
analysis = {
'status': '',
'completion_percentage': 0,
'missing_requirements': [],
'on_track': False,
'timeline': ''
}
try:
if parsed_data.get('format') == 'progress_summary':
total_match = re.search(r'Total\s*\|\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)%', text)
if total_match:
analysis['completion_percentage'] = float(total_match.group(4))
else:
total_required = sum(
float(req.get('required', 0))
for req in parsed_data.get('requirements', {}).values()
if req and str(req.get('required', '0')).replace('.','').isdigit()
)
total_completed = sum(
float(req.get('completed', 0))
for req in parsed_data.get('requirements', {}).values()
if req and str(req.get('completed', '0')).replace('.','').isdigit()
)
analysis['completion_percentage'] = (total_completed / total_required) * 100 if total_required > 0 else 0
analysis['missing_requirements'] = [
{
'code': code,
'description': req.get('description', ''),
'remaining': max(0, float(req.get('required', 0)) - float(req.get('completed', 0))),
'status': req.get('status', '')
}
for code, req in parsed_data.get('requirements', {}).items()
if req and float(req.get('completed', 0)) < float(req.get('required', 0))
]
else:
credits = parsed_data.get('academic_summary', {}).get('credits', {})
total_required = sum(
v.get('required', 0)
for v in credits.values()
if v and isinstance(v.get('required'), (int, float))
)
total_earned = sum(
v.get('earned', 0)
for v in credits.values()
if v and isinstance(v.get('earned'), (int, float))
)
analysis['completion_percentage'] = (total_earned / total_required) * 100 if total_required > 0 else 0
analysis['missing_requirements'] = [
{
'subject': subject,
'earned': info.get('earned', 0),
'required': info.get('required', 0),
'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
}
for subject, info in credits.items()
if info and info.get('required', 0) > info.get('earned', 0)
]
current_grade = parsed_data.get('student_info', {}).get('grade', '')
grad_year = parsed_data.get('student_info', {}).get('year_of_graduation', '')
if analysis['completion_percentage'] >= 100:
analysis['status'] = "πŸŽ‰ Congratulations! You've met all graduation requirements."
analysis['on_track'] = True
elif analysis['completion_percentage'] >= 90:
analysis['status'] = f"βœ… You've completed {analysis['completion_percentage']:.1f}% of requirements. Almost there!"
analysis['on_track'] = True
elif analysis['completion_percentage'] >= 75:
analysis['status'] = f"πŸ”„ You've completed {analysis['completion_percentage']:.1f}% of requirements. Keep working!"
analysis['on_track'] = True
elif analysis['completion_percentage'] >= 50:
analysis['status'] = f"⚠️ You've completed {analysis['completion_percentage']:.1f}% of requirements. Please meet with your counselor."
analysis['on_track'] = False
else:
analysis['status'] = f"❌ You've only completed {analysis['completion_percentage']:.1f}% of requirements. Immediate action needed."
analysis['on_track'] = False
if current_grade and grad_year:
remaining_credits = total_required - total_earned
years_remaining = int(grad_year) - datetime.datetime.now().year - int(current_grade)
if years_remaining > 0:
credits_per_year = remaining_credits / years_remaining
analysis['timeline'] = (
f"To graduate on time in {grad_year}, you need to complete approximately "
f"{credits_per_year:.1f} credits per year."
)
return analysis
except Exception as e:
logger.error(f"Graduation status error: {str(e)}")
return {
'status': 'Could not analyze graduation status - data may be incomplete',
'completion_percentage': 0,
'missing_requirements': [],
'on_track': False,
'timeline': 'Please verify your transcript contains credit information'
}
def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
analysis = {
'advanced_courses': 0,
'honors_courses': 0,
'ap_courses': 0,
'ib_courses': 0,
'de_courses': 0,
'rating': '',
'recommendations': []
}
try:
courses = parsed_data.get('course_history', [])
for course in courses:
course_title = course.get('description', '') or course.get('course_title', '')
course_title = course_title.upper()
if 'AP' in course_title or 'ADVANCED PLACEMENT' in course_title:
analysis['ap_courses'] += 1
analysis['advanced_courses'] += 1
elif 'IB' in course_title or 'INTERNATIONAL BACCALAUREATE' in course_title:
analysis['ib_courses'] += 1
analysis['advanced_courses'] += 1
elif 'DE' in course_title or 'DUAL ENROLLMENT' in course_title or 'COLLEGE' in course_title:
analysis['de_courses'] += 1
analysis['advanced_courses'] += 1
elif 'HONORS' in course_title or course.get('flag', '') == 'H':
analysis['honors_courses'] += 1
analysis['advanced_courses'] += 1
total_advanced = analysis['advanced_courses']
total_courses = len(courses)
if total_courses == 0:
return analysis
advanced_percentage = (total_advanced / total_courses) * 100
if advanced_percentage >= 50:
analysis['rating'] = 'Very High Rigor'
analysis['recommendations'] = [
"Your course rigor is excellent for college admissions",
"Consider adding 1-2 more advanced courses if manageable"
]
elif advanced_percentage >= 30:
analysis['rating'] = 'High Rigor'
analysis['recommendations'] = [
"Your course rigor is strong",
"Consider adding 1-2 more advanced courses next year"
]
elif advanced_percentage >= 15:
analysis['rating'] = 'Moderate Rigor'
analysis['recommendations'] = [
"Your course rigor is average",
"Consider adding more advanced courses to strengthen your profile"
]
else:
analysis['rating'] = 'Low Rigor'
analysis['recommendations'] = [
"Your course rigor is below average for college-bound students",
"Strongly consider adding advanced courses next semester",
"Meet with your counselor to discuss options"
]
return analysis
except Exception as e:
logger.error(f"Course rigor error: {str(e)}")
return {
'advanced_courses': 0,
'honors_courses': 0,
'ap_courses': 0,
'ib_courses': 0,
'de_courses': 0,
'rating': 'Unknown',
'recommendations': [
"Could not analyze course rigor - verify your transcript contains course information",
"Check that course titles and types were properly extracted"
]
}
def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
recommendations = {
'reach': [],
'target': [],
'safety': [],
'scholarships': [],
'improvement_areas': []
}
try:
if parsed_data.get('format') == 'progress_summary':
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
service_hours = int(parsed_data.get('student_info', {}).get('community_service_hours', 0))
else:
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
service_hours = int(parsed_data.get('assessments', {}).get('community_service', {}).get('hours', 0))
rigor_analysis = self.analyze_course_rigor(parsed_data)
if weighted_gpa >= 4.3 and rigor_analysis['advanced_courses'] >= 8 and service_hours >= 100:
recommendations['reach'].extend([
"Ivy League: Harvard, Yale, Princeton, Columbia, etc.",
"Stanford, MIT, CalTech, University of Chicago"
])
recommendations['target'].extend([
"Top Public Universities: UCLA, UC Berkeley, UMich, UVA",
"Elite Liberal Arts: Williams, Amherst, Swarthmore"
])
elif weighted_gpa >= 4.0 and rigor_analysis['advanced_courses'] >= 6 and service_hours >= 80:
recommendations['reach'].extend([
"Top 20 National Universities",
"Highly Selective Liberal Arts Colleges"
])
recommendations['target'].extend([
"Top 50 National Universities",
"Selective Public Flagships",
"Top Liberal Arts Colleges"
])
elif weighted_gpa >= 3.7 and rigor_analysis['advanced_courses'] >= 4 and service_hours >= 60:
recommendations['reach'].extend([
"Top 50 National Universities",
"Selective Liberal Arts Colleges"
])
recommendations['target'].extend([
"State Flagship Universities",
"Good Regional Universities"
])
elif weighted_gpa >= 3.3 and rigor_analysis['advanced_courses'] >= 2 and service_hours >= 40:
recommendations['target'].extend([
"State Universities",
"Many Private Colleges"
])
recommendations['safety'].extend([
"Less Selective Private Colleges",
"Community Colleges with Transfer Programs"
])
else:
recommendations['target'].extend([
"Open Admission Colleges",
"Some State Universities"
])
recommendations['safety'].extend([
"Community Colleges",
"Technical Schools"
])
if weighted_gpa >= 4.0:
recommendations['scholarships'].extend([
"National Merit Scholarship",
"Presidential Scholarships",
"College-Specific Full-Ride Scholarships"
])
elif weighted_gpa >= 3.7:
recommendations['scholarships'].extend([
"Bright Futures (Florida)",
"State-Specific Merit Scholarships",
"Honors College Scholarships"
])
elif weighted_gpa >= 3.3:
recommendations['scholarships'].extend([
"Local Community Scholarships",
"Special Interest Scholarships",
"First-Generation Student Programs"
])
if weighted_gpa < 3.5:
recommendations['improvement_areas'].append("Improve GPA through focused study and tutoring")
if rigor_analysis['advanced_courses'] < 4:
recommendations['improvement_areas'].append("Take more advanced courses (AP/IB/DE/Honors)")
if service_hours < 50:
recommendations['improvement_areas'].append("Increase community service involvement")
return recommendations
except Exception as e:
logger.error(f"College recommendations error: {str(e)}")
return {
'reach': ["Could not generate recommendations - insufficient data"],
'target': [],
'safety': [],
'scholarships': [],
'improvement_areas': [
"Complete your profile information",
"Ensure your transcript contains GPA and course information"
]
}
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
plan = {
'weekly_schedule': {},
'study_strategies': [],
'time_management_tips': [],
'resource_recommendations': []
}
try:
current_courses = [
course for course in parsed_data.get('course_history', [])
if course.get('status', '').lower() == 'in progress' or
(isinstance(course.get('credit_earned'), float) and course['credit_earned'] == 0)
]
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for day in days:
plan['weekly_schedule'][day] = []
study_blocks = 2
if learning_style.lower() == 'visual':
study_blocks = 3
plan['study_strategies'].extend([
"Create colorful mind maps for each subject",
"Use flashcards with images and diagrams",
"Watch educational videos on topics"
])
elif learning_style.lower() == 'auditory':
study_blocks = 2
plan['study_strategies'].extend([
"Record yourself explaining concepts and listen back",
"Participate in study groups",
"Listen to educational podcasts"
])
elif learning_style.lower() == 'reading/writing':
study_blocks = 4
plan['study_strategies'].extend([
"Write detailed summaries in your own words",
"Create question-answer sets for each topic",
"Rewrite your notes to reinforce learning"
])
elif learning_style.lower() == 'kinesthetic':
study_blocks = 3
plan['study_strategies'].extend([
"Create physical models or demonstrations",
"Study while walking or pacing",
"Use hands-on activities when possible"
])
for i, course in enumerate(current_courses):
day_index = i % 5
day = days[day_index]
course_name = course.get('description') or course.get('course_title', 'Course')
plan['weekly_schedule'][day].append({
'course': course_name,
'duration': '45-60 minutes',
'activities': [
"Review notes",
"Complete practice problems",
"Prepare questions for teacher"
]
})
plan['time_management_tips'].extend([
"Use the Pomodoro technique (25 min study, 5 min break)",
"Prioritize assignments by due date and importance",
"Schedule regular review sessions"
])
plan['resource_recommendations'].extend([
"Khan Academy for math and science",
"Quizlet for flashcards",
"Wolfram Alpha for math help"
])
return plan
except Exception as e:
logger.error(f"Study plan error: {str(e)}")
return {
'weekly_schedule': {'Error': ["Could not generate schedule - course data may be missing"]},
'study_strategies': [
"Review your notes regularly",
"Create a consistent study routine",
"Ask teachers for clarification when needed"
],
'time_management_tips': [
"Set aside dedicated study time each day",
"Break large tasks into smaller chunks",
"Use a planner to track assignments"
],
'resource_recommendations': [
"Khan Academy",
"Quizlet",
"Your textbook and class materials"
]
}
# Initialize academic analyzer
academic_analyzer = AcademicAnalyzer()
class DataVisualizer:
def __init__(self):
self.color_palette = {
'complete': '#4CAF50',
'incomplete': '#F44336',
'in_progress': '#FFC107',
'gpa_weighted': '#3F51B5',
'gpa_unweighted': '#9C27B0',
'core': '#3498DB',
'electives': '#2ECC71',
'arts_pe': '#9B59B6'
}
def create_gpa_visualization(self, parsed_data: Dict):
try:
if parsed_data.get('format') == 'progress_summary':
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
else:
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
gpa_data = {
"Type": ["Weighted GPA", "Unweighted GPA"],
"Value": [weighted_gpa, unweighted_gpa],
"Color": [self.color_palette['gpa_weighted'], self.color_palette['gpa_unweighted']]
}
df = pd.DataFrame(gpa_data)
fig = px.bar(
df,
x="Type",
y="Value",
title="GPA Comparison",
color="Type",
color_discrete_map={
"Weighted GPA": self.color_palette['gpa_weighted'],
"Unweighted GPA": self.color_palette['gpa_unweighted']
},
text="Value",
hover_data={"Type": True, "Value": ":.2f"}
)
fig.add_hline(y=4.0, line_dash="dot", line_color="green", annotation_text="Excellent", annotation_position="top left")
fig.add_hline(y=3.0, line_dash="dot", line_color="orange", annotation_text="Good", annotation_position="top left")
fig.add_hline(y=2.0, line_dash="dot", line_color="red", annotation_text="Minimum", annotation_position="top left")
fig.update_traces(
texttemplate='%{text:.2f}',
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5
)
fig.update_layout(
yaxis_range=[0, 5],
uniformtext_minsize=8,
uniformtext_mode='hide',
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12)
)
return fig
except Exception as e:
logger.error(f"Error creating GPA visualization: {str(e)}")
return None
def create_requirements_visualization(self, parsed_data: Dict):
try:
if parsed_data.get('format') == 'progress_summary':
req_data = []
for code, req in parsed_data.get('requirements', {}).items():
if req and req.get('percent_complete'):
completion = float(req['percent_complete'])
req_data.append({
"Requirement": f"{code}: {req.get('description', '')[:30]}...",
"Completion (%)": completion,
"Status": "Complete" if completion >= 100 else "In Progress" if completion > 0 else "Not Started",
"Required": req.get('required', 0),
"Completed": req.get('completed', 0),
"Remaining": max(0, float(req.get('required', 0)) - float(req.get('completed', 0)))
})
else:
req_data = []
credits = parsed_data.get('academic_summary', {}).get('credits', {})
for subject, info in credits.items():
if info.get('required') and info.get('earned'):
completion = (info['earned'] / info['required']) * 100 if info['required'] > 0 else 0
req_data.append({
"Requirement": subject,
"Completion (%)": completion,
"Status": "Complete" if completion >= 100 else "In Progress" if completion > 0 else "Not Started",
"Required": info.get('required', 0),
"Completed": info.get('earned', 0),
"Remaining": max(0, info.get('required', 0) - info.get('earned', 0))
})
if not req_data:
return None
df = pd.DataFrame(req_data)
fig = px.bar(
df,
x="Requirement",
y="Completion (%)",
title="Graduation Requirements Completion",
color="Status",
color_discrete_map={
"Complete": self.color_palette['complete'],
"In Progress": self.color_palette['in_progress'],
"Not Started": self.color_palette['incomplete']
},
hover_data=["Required", "Completed", "Remaining"],
text="Completion (%)"
)
fig.update_traces(
texttemplate='%{text:.1f}%',
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5
)
fig.update_layout(
xaxis={'categoryorder':'total descending'},
yaxis_range=[0, 100],
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12),
hovermode="x unified"
)
fig.add_hline(y=100, line_dash="dot", line_color="green")
return fig
except Exception as e:
logger.error(f"Error creating requirements visualization: {str(e)}")
return None
def create_credits_distribution_visualization(self, parsed_data: Dict):
try:
if parsed_data.get('format') == 'progress_summary':
core_credits = sum(
req['completed'] for req in parsed_data.get('requirements', {}).values()
if req and req.get('code', '').split('-')[0] in ['A', 'B', 'C', 'D']
)
elective_credits = sum(
req['completed'] for req in parsed_data.get('requirements', {}).values()
if req and req.get('code', '').split('-')[0] in ['G', 'H']
)
other_credits = sum(
req['completed'] for req in parsed_data.get('requirements', {}).values()
if req and req.get('code', '').split('-')[0] in ['E', 'F']
)
else:
credits = parsed_data.get('academic_summary', {}).get('credits', {})
core_credits = sum(
info['earned'] for subject, info in credits.items()
if subject.split()[0] in ['ENGLISH', 'ALGEBRA1', 'GEOMETRY', 'MATHEMATICS', 'BIOLOGY', 'SCIENCE']
)
elective_credits = sum(
info['earned'] for subject, info in credits.items()
if subject.split()[0] in ['ELECTIVE', 'WORLD']
)
other_credits = sum(
info['earned'] for subject, info in credits.items()
if subject.split()[0] in ['ARTS', 'PHYSICAL', 'PERFORMING']
)
credit_values = [core_credits, elective_credits, other_credits]
credit_labels = ['Core Subjects', 'Electives', 'Arts/PE']
if sum(credit_values) == 0:
return None
df = pd.DataFrame({
"Category": credit_labels,
"Credits": credit_values,
"Color": [self.color_palette['core'], self.color_palette['electives'], self.color_palette['arts_pe']]
})
fig = px.pie(
df,
values="Credits",
names="Category",
title="Credit Distribution",
color="Category",
color_discrete_map={
"Core Subjects": self.color_palette['core'],
"Electives": self.color_palette['electives'],
"Arts/PE": self.color_palette['arts_pe']
},
hole=0.3
)
fig.update_traces(
textposition='inside',
textinfo='percent+label',
marker=dict(line=dict(color='#FFFFFF', width=2))
)
fig.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12),
showlegend=False
)
return fig
except Exception as e:
logger.error(f"Error creating credits visualization: {str(e)}")
return None
def create_course_rigor_visualization(self, parsed_data: Dict):
try:
rigor = academic_analyzer.analyze_course_rigor(parsed_data)
data = {
"Type": ["AP", "IB", "DE", "Honors"],
"Count": [rigor['ap_courses'], rigor['ib_courses'], rigor['de_courses'], rigor['honors_courses']],
"Color": ["#E91E63", "#673AB7", "#009688", "#FF9800"]
}
df = pd.DataFrame(data)
fig = px.bar(
df,
x="Type",
y="Count",
title="Advanced Course Breakdown",
color="Type",
color_discrete_map={
"AP": "#E91E63",
"IB": "#673AB7",
"DE": "#009688",
"Honors": "#FF9800"
},
text="Count"
)
fig.update_traces(
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5
)
fig.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12),
xaxis_title="Course Type",
yaxis_title="Number of Courses"
)
return fig
except Exception as e:
logger.error(f"Error creating course rigor visualization: {str(e)}")
return None
# Initialize visualizer
data_visualizer = DataVisualizer()
class EnhancedProfileManager:
def __init__(self):
self.profiles_dir = Path(PROFILES_DIR)
self.profiles_dir.mkdir(exist_ok=True, parents=True)
self.current_session = None
self.encryptor = DataEncryptor(ENCRYPTION_KEY)
def set_session(self, session_token: str) -> None:
self.current_session = session_token
def get_profile_path(self, name: str) -> Path:
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
if self.current_session:
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
return self.profiles_dir / f"{name_hash}_profile.json"
def save_profile(self, name: str, age: Union[int, str], interests: str,
transcript: Dict, learning_style: str,
movie: str, movie_reason: str, show: str, show_reason: str,
book: str, book_reason: str, character: str, character_reason: str,
blog: str, study_plan: Dict = None) -> str:
try:
name = validate_name(name)
age = validate_age(age)
if not interests.strip():
raise ValueError("Please describe at least one interest or hobby.")
if not transcript:
raise ValueError("Please complete the transcript analysis first.")
if not learning_style or "Your primary learning style is" not in learning_style:
raise ValueError("Please complete the learning style quiz first.")
favorites = {
"movie": sanitize_input(movie),
"movie_reason": sanitize_input(movie_reason),
"show": sanitize_input(show),
"show_reason": sanitize_input(show_reason),
"book": sanitize_input(book),
"book_reason": sanitize_input(book_reason),
"character": sanitize_input(character),
"character_reason": sanitize_input(character_reason)
}
if not study_plan:
learning_style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*", learning_style)
if learning_style_match:
study_plan = academic_analyzer.generate_study_plan(
transcript,
learning_style_match.group(1))
data = {
"name": self.encryptor.encrypt(name),
"age": age,
"interests": self.encryptor.encrypt(sanitize_input(interests)),
"transcript": transcript,
"learning_style": learning_style,
"favorites": favorites,
"blog": self.encryptor.encrypt(sanitize_input(blog)) if blog else "",
"study_plan": study_plan if study_plan else {},
"session_token": self.current_session,
"last_updated": time.time(),
"version": "2.1"
}
filepath = self.get_profile_path(name)
temp_path = filepath.with_suffix('.tmp')
with open(temp_path, "w", encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
temp_path.replace(filepath)
if HF_TOKEN and hf_api:
try:
hf_api.upload_file(
path_or_fileobj=filepath,
path_in_repo=f"profiles/{filepath.name}",
repo_id="your-username/student-learning-assistant",
repo_type="dataset",
commit_message=f"Profile update for {name}"
)
except Exception as e:
logger.error(f"Failed to upload to HF Hub: {str(e)}")
return f"Profile saved successfully for {name}."
except Exception as e:
logger.error(f"Profile save error: {str(e)}")
raise gr.Error(f"Couldn't save profile: {str(e)}")
def load_profile(self, name: str = None, session_token: str = None) -> Dict:
for attempt in range(MAX_PROFILE_LOAD_ATTEMPTS):
try:
if session_token:
profile_pattern = f"*{session_token}_profile.json"
else:
profile_pattern = "*.json"
profiles = list(self.profiles_dir.glob(profile_pattern))
if not profiles:
return {}
if name:
profile_file = self.get_profile_path(name)
if not profile_file.exists():
if HF_TOKEN and hf_api:
try:
hf_api.download_file(
path_in_repo=f"profiles/{profile_file.name}",
repo_id="your-username/student-learning-assistant",
repo_type="dataset",
local_dir=self.profiles_dir
)
except Exception as e:
logger.warning(f"Failed to download profile: {str(e)}")
raise gr.Error(f"No profile found for {name}")
else:
raise gr.Error(f"No profile found for {name}")
else:
profiles.sort(key=lambda x: x.stat().st_mtime, reverse=True)
profile_file = profiles[0]
with open(profile_file, "r", encoding='utf-8') as f:
profile_data = json.load(f)
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
raise gr.Error("Session expired. Please start a new session.")
if profile_data.get('version', '1.0') in ['2.0', '2.1']:
try:
profile_data['name'] = self.encryptor.decrypt(profile_data['name'])
profile_data['interests'] = self.encryptor.decrypt(profile_data.get('interests', ''))
if profile_data.get('blog'):
profile_data['blog'] = self.encryptor.decrypt(profile_data['blog'])
except Exception as e:
logger.error(f"Decryption error: {str(e)}")
raise gr.Error("Failed to decrypt profile data")
return profile_data
except json.JSONDecodeError as e:
if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1:
logger.error(f"Failed to load profile after {MAX_PROFILE_LOAD_ATTEMPTS} attempts")
raise gr.Error("Corrupted profile data")
time.sleep(0.5 * (attempt + 1))
except Exception as e:
if attempt == MAX_PROFILE_LOAD_ATTEMPTS - 1:
raise
time.sleep(0.5 * (attempt + 1))
def list_profiles(self, session_token: str = None) -> List[str]:
if session_token:
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
else:
profiles = list(self.profiles_dir.glob("*.json"))
profile_names = []
for p in profiles:
try:
with open(p, "r", encoding='utf-8') as f:
data = json.load(f)
if data.get('version', '1.0') in ['2.0', '2.1']:
try:
name = self.encryptor.decrypt(data['name'])
profile_names.append(name)
except:
profile_names.append(p.stem)
else:
profile_names.append(data.get('name', p.stem))
except:
continue
return profile_names
def delete_profile(self, name: str, session_token: str = None) -> bool:
try:
profile_file = self.get_profile_path(name)
if not profile_file.exists():
return False
with open(profile_file, "r", encoding='utf-8') as f:
data = json.load(f)
if session_token and data.get('session_token') != session_token:
return False
profile_file.unlink()
if HF_TOKEN and hf_api:
try:
hf_api.delete_file(
path_in_repo=f"profiles/{profile_file.name}",
repo_id="your-username/student-learning-assistant",
repo_type="dataset"
)
except Exception as e:
logger.error(f"Failed to delete from HF Hub: {str(e)}")
return True
except Exception as e:
logger.error(f"Error deleting profile: {str(e)}")
return False
# Initialize profile manager
profile_manager = EnhancedProfileManager()
class EducationalChatbot:
def __init__(self):
self.model_name = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
self.tokenizer = None
self.model = None
self.educational_topics = {
'math': ['algebra', 'calculus', 'geometry', 'trigonometry'],
'science': ['biology', 'chemistry', 'physics', 'astronomy'],
'humanities': ['history', 'literature', 'philosophy'],
'languages': ['english', 'spanish', 'french', 'grammar'],
'arts': ['music', 'art', 'drama'],
'technology': ['programming', 'computer science']
}
self.load_model()
def load_model(self):
"""Load the HuggingFace model"""
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16
)
logger.info("Educational chatbot model loaded")
def is_educational(self, question: str) -> bool:
"""Check if question is educational"""
question_lower = question.lower()
for category, topics in self.educational_topics.items():
if any(topic in question_lower for topic in topics):
return True
return False
def generate_response(self, question: str, profile: Dict) -> Tuple[str, List[Dict]]:
"""Generate a personalized educational response"""
if not self.is_educational(question):
return (
"I specialize in educational topics only. Please ask about subjects like math, "
"science, history, or literature. I can help with concepts, problem-solving methods, "
"and learning strategies.",
[]
)
# Get learning style from profile
learning_style = self._get_learning_style(profile)
# Generate base response using the model
prompt = self._build_prompt(question, profile)
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
outputs = self.model.generate(
**inputs,
max_new_tokens=300,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
do_sample=True
)
raw_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Process the response to be more pedagogical
processed_response = self._make_response_pedagogical(raw_response)
# Add multimedia based on learning style
multimedia = self._get_multimedia_suggestions(processed_response, learning_style)
return processed_response, multimedia
def _get_learning_style(self, profile: Dict) -> str:
"""Extract learning style from profile"""
if not profile or 'learning_style' not in profile:
return 'balanced'
style_match = re.search(r"Your primary learning style is\s*\*\*(.*?)\*\*",
profile['learning_style'])
return style_match.group(1).lower() if style_match else 'balanced'
def _build_prompt(self, question: str, profile: Dict) -> str:
"""Build a personalized prompt for the model"""
base_prompt = (
"You are an expert teaching assistant helping a student. Your role is to guide them "
"to discover answers themselves, not provide direct solutions. Use the Socratic method "
"by asking guiding questions and explaining concepts step-by-step.\n\n"
)
if profile:
# Add academic context if available
if 'transcript' in profile:
courses = [c['course_title'] for c in profile['transcript'].get('course_history', [])]
base_prompt += (
f"The student has taken these courses: {', '.join(courses[:5])}. "
"Consider their academic background when responding.\n\n"
)
# Add learning style
learning_style = self._get_learning_style(profile)
if learning_style != 'balanced':
base_prompt += (
f"The student is a {learning_style} learner. Adapt your teaching approach accordingly.\n\n"
)
base_prompt += (
f"Student Question: {question}\n\n"
"Teaching Assistant Response:\n"
"1. First, let's understand the key concepts involved...\n"
"2. What do you think would be the first step in solving this?\n"
"3. Consider this approach...\n"
"4. Here's how we might break this down...\n"
"Remember, the goal is understanding, not just the answer."
)
return base_prompt
def _make_response_pedagogical(self, response: str) -> str:
"""Process the raw response to be more teaching-oriented"""
# Remove direct answers if present
response = re.sub(r"(the answer is|it is|direct solution:) .*?(\n|$)", "", response, flags=re.I)
# Add more guiding language
guiding_phrases = [
"What do you think about...",
"Have you considered...",
"Let's break this down...",
"One approach might be...",
"Think about how you would...",
"What steps would you take to..."
]
# Ensure response has at least 2 guiding questions
if sum(1 for phrase in guiding_phrases if phrase.lower() in response.lower()) < 2:
response += "\n\n" + "\n".join(guiding_phrases[:2])
return response
def _get_multimedia_suggestions(self, response: str, learning_style: str) -> List[Dict]:
"""Generate multimedia suggestions based on learning style and content"""
suggestions = []
# Common educational platforms
resources = {
'visual': [
{"type": "video", "source": "Khan Academy", "url": "https://www.khanacademy.org"},
{"type": "diagram", "source": "Math is Fun", "url": "https://www.mathsisfun.com"},
{"type": "infographic", "source": "InfoGram", "url": "https://infogram.com"}
],
'auditory': [
{"type": "podcast", "source": "Stuff You Should Know", "url": "https://www.iheart.com/podcast/stuff-you-should-know-26940277"},
{"type": "audio_lecture", "source": "The Great Courses", "url": "https://www.thegreatcourses.com"}
],
'reading/writing': [
{"type": "article", "source": "Britannica", "url": "https://www.britannica.com"},
{"type": "textbook", "source": "OpenStax", "url": "https://openstax.org"}
],
'kinesthetic': [
{"type": "interactive", "source": "PhET Simulations", "url": "https://phet.colorado.edu"},
{"type": "hands-on", "source": "Science Buddies", "url": "https://www.sciencebuddies.org"}
]
}
# Add general suggestions based on learning style
if learning_style in resources:
suggestions.extend(resources[learning_style][:2])
# Add specific content based on response
if "math" in response.lower():
suggestions.append({
"type": "practice_problems",
"source": "Art of Problem Solving",
"url": "https://artofproblemsolving.com"
})
elif "science" in response.lower():
suggestions.append({
"type": "experiment",
"source": "Science Journal",
"url": "https://sciencejournal.withgoogle.com"
})
return suggestions
# Initialize the chatbot
educational_chatbot = EducationalChatbot()
class StudyCalendar:
def __init__(self):
self.default_study_blocks = {
'Monday': [('16:00', '17:30'), ('19:00', '20:30')],
'Tuesday': [('16:00', '17:30')],
'Wednesday': [('16:00', '17:30'), ('19:00', '20:30')],
'Thursday': [('16:00', '17:30')],
'Friday': [('15:00', '16:30')],
'Saturday': [('10:00', '12:00')],
'Sunday': [('14:00', '16:00')]
}
def generate_study_calendar(self, profile: Dict, start_date: str) -> Dict:
"""Generate a study calendar based on the student's profile"""
try:
calendar = {
'start_date': start_date,
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
'events': [],
'exams': []
}
# Add regular study sessions
current_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
end_date = datetime.datetime.strptime(calendar['end_date'], '%Y-%m-%d')
while current_date <= end_date:
day_name = current_date.strftime('%A')
if day_name in self.default_study_blocks:
for time_block in self.default_study_blocks[day_name]:
calendar['events'].append({
'date': current_date.strftime('%Y-%m-%d'),
'title': 'Study Session',
'description': 'Focused study time',
'start_time': time_block[0],
'end_time': time_block[1],
'duration': f"{time_block[0]} to {time_block[1]}"
})
current_date += datetime.timedelta(days=1)
# Add exams from transcript if available
transcript = profile.get('transcript', {})
if transcript.get('course_history'):
for course in transcript['course_history']:
if 'exam' in course.get('course_title', '').lower():
exam_date = (datetime.datetime.strptime(start_date, '%Y-%m-%d') +
datetime.timedelta(days=random.randint(7, 28))).strftime('%Y-%m-%d')
calendar['exams'].append({
'date': exam_date,
'title': course.get('course_title', 'Exam'),
'description': 'Prepare by reviewing materials',
'duration': 'All day'
})
return calendar
except Exception as e:
logger.error(f"Error generating study calendar: {str(e)}")
return {
'start_date': start_date,
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
'events': [],
'exams': []
}
def create_calendar_visualization(self, calendar: Dict):
"""Create a visualization of the study calendar"""
try:
if not calendar.get('events') and not calendar.get('exams'):
return None
events_df = pd.DataFrame(calendar['events'])
exams_df = pd.DataFrame(calendar['exams'])
fig = px.timeline(
events_df,
x_start="start_time",
x_end="end_time",
y="date",
color_discrete_sequence=['#4CAF50'],
title="Study Schedule"
)
if not exams_df.empty:
fig.add_trace(px.timeline(
exams_df,
x_start=[datetime.time(0,0).strftime('%H:%M')] * len(exams_df),
x_end=[datetime.time(23,59).strftime('%H:%M')] * len(exams_df),
y="date",
color_discrete_sequence=['#F44336']
).data[0])
fig.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12),
showlegend=False
)
return fig
except Exception as e:
logger.error(f"Error creating calendar visualization: {str(e)}")
return None
# Initialize study calendar
study_calendar = StudyCalendar()
class GoalTracker:
def __init__(self):
self.goals_file = Path("student_goals.json")
self.goals_file.touch(exist_ok=True)
def add_goal(self, student_name: str, goal_type: str, description: str,
target_date: str, target_value: Optional[float] = None) -> bool:
"""Add a new goal for the student"""
try:
if not validate_date(target_date):
raise ValueError("Invalid target date format. Please use YYYY-MM-DD")
goals = self._load_goals()
student_goals = goals.get(student_name, [])
new_goal = {
'id': str(len(student_goals) + 1),
'type': goal_type,
'description': description,
'target_date': target_date,
'target_value': target_value,
'created_at': datetime.datetime.now().isoformat(),
'progress': []
}
student_goals.append(new_goal)
goals[student_name] = student_goals
with open(self.goals_file, 'w') as f:
json.dump(goals, f, indent=2)
return True
except Exception as e:
logger.error(f"Error adding goal: {str(e)}")
return False
def update_goal_progress(self, student_name: str, goal_id: str,
progress_value: float, notes: str = "") -> bool:
"""Update progress for a specific goal"""
try:
goals = self._load_goals()
if student_name not in goals:
return False
for goal in goals[student_name]:
if goal['id'] == goal_id:
goal['progress'].append({
'date': datetime.datetime.now().isoformat(),
'value': progress_value,
'notes': notes
})
break
with open(self.goals_file, 'w') as f:
json.dump(goals, f, indent=2)
return True
except Exception as e:
logger.error(f"Error updating goal progress: {str(e)}")
return False
def get_goals(self, student_name: str) -> List[Dict]:
"""Get all goals for a student"""
try:
goals = self._load_goals()
return goals.get(student_name, [])
except Exception as e:
logger.error(f"Error getting goals: {str(e)}")
return []
def create_goal_visualization(self, goals: List[Dict]):
"""Create a visualization of goal progress"""
try:
if not goals:
return None
progress_data = []
for goal in goals:
if goal.get('progress'):
last_progress = goal['progress'][-1]
progress_data.append({
'Goal': goal['description'],
'Progress': last_progress['value'],
'Target': goal.get('target_value', 100),
'Type': goal['type']
})
if not progress_data:
return None
df = pd.DataFrame(progress_data)
fig = px.bar(
df,
x='Goal',
y=['Progress', 'Target'],
barmode='group',
title="Goal Progress",
color_discrete_map={
'Progress': '#4CAF50',
'Target': '#2196F3'
}
)
fig.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(size=12)
)
return fig
except Exception as e:
logger.error(f"Error creating goal visualization: {str(e)}")
return None
def _load_goals(self) -> Dict:
"""Load all goals from the file"""
try:
with open(self.goals_file, 'r') as f:
return json.load(f)
except (json.JSONDecodeError, FileNotFoundError):
return {}
# Initialize goal tracker
goal_tracker = GoalTracker()
def create_enhanced_interface():
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
session_token = gr.State(value=generate_session_token())
profile_manager.set_session(session_token.value)
tab_completed = gr.State({
0: False,
1: False,
2: False,
3: False,
4: False,
5: False
})
app.css = """
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.tab-content {
padding: 20px !important;
border: 1px solid #e0e0e0 !important;
border-radius: 8px !important;
margin-top: 10px !important;
background-color: white;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
.completed-tab {
background: #4CAF50 !important;
color: white !important;
font-weight: bold;
}
.incomplete-tab {
background: #E0E0E0 !important;
color: #616161;
}
.nav-message {
padding: 12px;
margin: 10px 0;
border-radius: 6px;
background-color: #ffebee;
color: #c62828;
border-left: 4px solid #c62828;
}
.file-upload {
border: 2px dashed #4CAF50 !important;
padding: 25px !important;
border-radius: 8px !important;
text-align: center;
background-color: #f8f8f8;
}
.file-upload:hover {
background: #f1f8e9;
}
.progress-bar {
height: 6px;
background: linear-gradient(to right, #4CAF50, #8BC34A);
margin-bottom: 15px;
border-radius: 3px;
box-shadow: inset 0 1px 2px rgba(0,0,0,0.1);
}
.quiz-question {
margin-bottom: 15px;
padding: 15px;
background: #f5f5f5;
border-radius: 5px;
border-left: 4px solid #2196F3;
}
.quiz-results {
margin-top: 20px;
padding: 20px;
background: #e8f5e9;
border-radius: 8px;
border-left: 4px solid #4CAF50;
}
.error-message {
color: #d32f2f;
background-color: #ffebee;
padding: 12px;
border-radius: 6px;
margin: 10px 0;
border-left: 4px solid #d32f2f;
}
.transcript-results {
border-left: 4px solid #4CAF50 !important;
padding: 15px !important;
background: #f8f8f8 !important;
border-radius: 4px;
}
.error-box {
border: 1px solid #ff4444 !important;
background: #fff8f8 !important;
border-radius: 4px;
}
.metric-box {
background-color: white;
border-radius: 10px;
padding: 15px;
margin: 10px 0;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
border-left: 4px solid #2196F3;
}
.recommendation {
background-color: #fff8e1;
padding: 10px;
border-left: 4px solid #ffc107;
margin: 5px 0;
border-radius: 4px;
}
.goal-card {
background-color: white;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border-left: 4px solid #4CAF50;
}
.calendar-event {
background-color: #e3f2fd;
border-radius: 6px;
padding: 10px;
margin: 5px 0;
border-left: 4px solid #2196F3;
}
.dark .tab-content {
background-color: #2d2d2d !important;
border-color: #444 !important;
color: #eee !important;
}
.dark .quiz-question {
background-color: #3d3d3d !important;
color: #eee !important;
}
.dark .quiz-results {
background-color: #2e3d2e !important;
color: #eee !important;
}
.dark textarea, .dark input {
background-color: #333 !important;
color: #eee !important;
border-color: #555 !important;
}
.dark .output-markdown {
color: #eee !important;
}
.dark .chatbot {
background-color: #333 !important;
}
.dark .chatbot .user, .dark .chatbot .assistant {
color: #eee !important;
}
.dark .metric-box {
background-color: #333 !important;
color: #eee !important;
}
.dark .goal-card {
background-color: #333;
color: #eee;
}
.dark .calendar-event {
background-color: #1a3d5c;
color: #eee;
}
"""
with gr.Row():
with gr.Column(scale=4):
gr.Markdown("""
# πŸ“š Student Learning Assistant
**Your personalized education companion**
Complete each step to get customized learning recommendations and academic planning.
""")
with gr.Column(scale=1):
dark_mode = gr.Checkbox(label="Dark Mode", value=False)
with gr.Row():
with gr.Column(scale=1, min_width=100):
step1 = gr.Button("πŸ“„ 1. Transcript", elem_classes="incomplete-tab")
with gr.Column(scale=1, min_width=100):
step2 = gr.Button("πŸ“ 2. Quiz", elem_classes="incomplete-tab", interactive=False)
with gr.Column(scale=1, min_width=100):
step3 = gr.Button("πŸ‘€ 3. Profile", elem_classes="incomplete-tab", interactive=False)
with gr.Column(scale=1, min_width=100):
step4 = gr.Button("πŸ” 4. Review", elem_classes="incomplete-tab", interactive=False)
with gr.Column(scale=1, min_width=100):
step5 = gr.Button("πŸ’¬ 5. Assistant", elem_classes="incomplete-tab", interactive=False)
with gr.Column(scale=1, min_width=100):
step6 = gr.Button("🎯 6. Goals", elem_classes="incomplete-tab", interactive=False)
nav_message = gr.HTML(visible=False)
with gr.Tabs(visible=True) as tabs:
with gr.Tab("Transcript", id=0):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“„ Step 1: Upload Your Transcript")
with gr.Group(elem_classes="file-upload"):
file_input = gr.File(
label="Drag and drop your transcript here (PDF or Image)",
file_types=ALLOWED_FILE_TYPES,
type="filepath"
)
upload_btn = gr.Button("Analyze Transcript", variant="primary")
file_error = gr.HTML(visible=False)
with gr.Column(scale=2):
transcript_output = gr.Textbox(
label="Analysis Results",
lines=10,
interactive=False,
elem_classes="transcript-results"
)
with gr.Row():
gpa_viz = gr.Plot(label="GPA Visualization", visible=False)
req_viz = gr.Plot(label="Requirements Visualization", visible=False)
with gr.Row():
credits_viz = gr.Plot(label="Credits Distribution", visible=False)
rigor_viz = gr.Plot(label="Course Rigor", visible=False)
transcript_data = gr.State()
file_input.change(
fn=lambda f: (
gr.update(visible=False),
gr.update(value="File ready for analysis!", visible=True) if f
else gr.update(value="Please upload a file", visible=False)
),
inputs=file_input,
outputs=[file_error, transcript_output]
)
def process_and_visualize(file_obj, tab_status):
try:
parsed_data = transcript_parser.parse_transcript(file_obj.name)
gpa_analysis = academic_analyzer.analyze_gpa(parsed_data)
grad_status = academic_analyzer.analyze_graduation_status(parsed_data)
college_recs = academic_analyzer.generate_college_recommendations(parsed_data)
results = [
f"## πŸ“Š GPA Analysis",
f"**Rating:** {gpa_analysis['rating']}",
f"{gpa_analysis['description']}",
f"{gpa_analysis['comparison']}",
"",
f"## πŸŽ“ Graduation Status",
grad_status['status'],
f"**Completion:** {grad_status['completion_percentage']:.1f}%",
"",
f"## 🏫 College Recommendations"
]
if college_recs['reach']:
results.append("\n**Reach Schools:**")
results.extend([f"- {school}" for school in college_recs['reach'][:3]])
if college_recs['target']:
results.append("\n**Target Schools:**")
results.extend([f"- {school}" for school in college_recs['target'][:3]])
if gpa_analysis.get('improvement_tips'):
results.append("\n**Improvement Tips:**")
results.extend([f"- {tip}" for tip in gpa_analysis['improvement_tips']])
viz_updates = [
gr.update(visible=data_visualizer.create_gpa_visualization(parsed_data) is not None),
gr.update(visible=data_visualizer.create_requirements_visualization(parsed_data) is not None),
gr.update(visible=data_visualizer.create_credits_distribution_visualization(parsed_data) is not None),
gr.update(visible=data_visualizer.create_course_rigor_visualization(parsed_data) is not None)
]
tab_status[0] = True
return "\n".join(results), parsed_data, *viz_updates, tab_status
except Exception as e:
error_msg = f"Error processing transcript: {str(e)}"
logger.error(error_msg)
raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<{MAX_FILE_SIZE_MB}MB)")
upload_btn.click(
fn=process_and_visualize,
inputs=[file_input, tab_completed],
outputs=[transcript_output, transcript_data, gpa_viz, req_viz, credits_viz, rigor_viz, tab_completed]
).then(
fn=lambda: gr.update(elem_classes="completed-tab"),
outputs=step1
).then(
fn=lambda: gr.update(interactive=True),
outputs=step2
)
with gr.Tab("Learning Style Quiz", id=1):
with gr.Column():
gr.Markdown("### πŸ“ Step 2: Discover Your Learning Style")
progress = gr.HTML("<div class='progress-bar' style='width: 0%'></div>")
quiz_components = []
with gr.Accordion("Quiz Questions", open=True):
for i, (question, options) in enumerate(zip(learning_style_quiz.questions, learning_style_quiz.options)):
with gr.Group(elem_classes="quiz-question"):
q = gr.Radio(
options,
label=f"{i+1}. {question}",
show_label=True
)
quiz_components.append(q)
with gr.Row():
quiz_submit = gr.Button("Submit Quiz", variant="primary")
quiz_clear = gr.Button("Clear Answers")
quiz_alert = gr.HTML(visible=False)
learning_output = gr.Markdown(
label="Your Learning Style Results",
visible=False,
elem_classes="quiz-results"
)
for component in quiz_components:
component.change(
fn=lambda *answers: {
progress: gr.HTML(
f"<div class='progress-bar' style='width: {sum(1 for a in answers if a)/len(answers)*100}%'></div>"
)
},
inputs=quiz_components,
outputs=progress
)
quiz_submit.click(
fn=lambda *answers: learning_style_quiz.evaluate_quiz(*answers),
inputs=quiz_components,
outputs=learning_output
).then(
fn=lambda: gr.update(visible=True),
outputs=learning_output
).then(
fn=lambda: {1: True},
inputs=None,
outputs=tab_completed
).then(
fn=lambda: gr.update(elem_classes="completed-tab"),
outputs=step2
).then(
fn=lambda: gr.update(interactive=True),
outputs=step3
)
quiz_clear.click(
fn=lambda: [None] * len(quiz_components),
outputs=quiz_components
).then(
fn=lambda: gr.HTML("<div class='progress-bar' style='width: 0%'></div>"),
outputs=progress
)
with gr.Tab("Personal Profile", id=2):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ‘€ Step 3: Tell Us About Yourself")
with gr.Group():
name = gr.Textbox(label="Full Name", placeholder="Your name")
age = gr.Number(label="Age", minimum=MIN_AGE, maximum=MAX_AGE, precision=0)
interests = gr.Textbox(
label="Your Interests/Hobbies",
placeholder="e.g., Science, Music, Sports, Art..."
)
save_personal_btn = gr.Button("Save Information", variant="primary")
save_confirmation = gr.HTML(visible=False)
with gr.Column(scale=1):
gr.Markdown("### ❀️ Favorites")
with gr.Group():
movie = gr.Textbox(label="Favorite Movie")
movie_reason = gr.Textbox(label="Why do you like it?", lines=2)
show = gr.Textbox(label="Favorite TV Show")
show_reason = gr.Textbox(label="Why do you like it?", lines=2)
book = gr.Textbox(label="Favorite Book")
book_reason = gr.Textbox(label="Why do you like it?", lines=2)
character = gr.Textbox(label="Favorite Character (from any story)")
character_reason = gr.Textbox(label="Why do you like them?", lines=2)
with gr.Accordion("Personal Blog (Optional)", open=False):
blog = gr.Textbox(
label="Share your thoughts",
placeholder="Write something about yourself...",
lines=5
)
save_personal_btn.click(
fn=lambda n, a, i, ts: (
{2: True},
gr.update(elem_classes="completed-tab"),
gr.update(interactive=True),
gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True)
),
inputs=[name, age, interests, tab_completed],
outputs=[tab_completed, step3, step4, save_confirmation]
)
with gr.Tab("Save Profile", id=3):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ” Step 4: Review & Save Your Profile")
with gr.Group():
load_profile_dropdown = gr.Dropdown(
label="Load Existing Profile",
choices=profile_manager.list_profiles(session_token.value),
visible=False
)
with gr.Row():
load_btn = gr.Button("Load", visible=False)
delete_btn = gr.Button("Delete", variant="stop", visible=False)
save_btn = gr.Button("Save Profile", variant="primary")
clear_btn = gr.Button("Clear Form")
with gr.Column(scale=2):
output_summary = gr.Markdown(
"Your profile summary will appear here after saving.",
label="Profile Summary"
)
with gr.Row():
req_viz_matplotlib = gr.Plot(label="Requirements Progress", visible=False)
credits_viz_matplotlib = gr.Plot(label="Credits Distribution", visible=False)
save_btn.click(
fn=profile_manager.save_profile,
inputs=[
name, age, interests, transcript_data, learning_output,
movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason, blog
],
outputs=output_summary
).then(
fn=lambda td: (
gr.update(visible=data_visualizer.create_requirements_visualization(td) is not None),
gr.update(visible=data_visualizer.create_credits_distribution_visualization(td) is not None)
),
inputs=transcript_data,
outputs=[req_viz_matplotlib, credits_viz_matplotlib]
).then(
fn=lambda: {3: True},
inputs=None,
outputs=tab_completed
).then(
fn=lambda: gr.update(elem_classes="completed-tab"),
outputs=step4
).then(
fn=lambda: gr.update(interactive=True),
outputs=step5
).then(
fn=lambda: gr.update(interactive=True),
outputs=step6
).then(
fn=lambda: profile_manager.list_profiles(session_token.value),
outputs=load_profile_dropdown
).then(
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
outputs=load_btn
).then(
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
outputs=delete_btn
)
load_btn.click(
fn=lambda: profile_manager.load_profile(load_profile_dropdown.value, session_token.value),
inputs=None,
outputs=None
).then(
fn=lambda profile: (
profile.get('name', ''),
profile.get('age', ''),
profile.get('interests', ''),
profile.get('learning_style', ''),
profile.get('favorites', {}).get('movie', ''),
profile.get('favorites', {}).get('movie_reason', ''),
profile.get('favorites', {}).get('show', ''),
profile.get('favorites', {}).get('show_reason', ''),
profile.get('favorites', {}).get('book', ''),
profile.get('favorites', {}).get('book_reason', ''),
profile.get('favorites', {}).get('character', ''),
profile.get('favorites', {}).get('character_reason', ''),
profile.get('blog', ''),
profile.get('transcript', {}),
gr.update(value="Profile loaded successfully!"),
data_visualizer.create_requirements_visualization(profile.get('transcript', {})),
data_visualizer.create_credits_distribution_visualization(profile.get('transcript', {}))
),
inputs=None,
outputs=[
name, age, interests, learning_output,
movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason,
blog, transcript_data, output_summary,
req_viz_matplotlib, credits_viz_matplotlib
]
)
with gr.Tab("AI Teaching Assistant", id=4):
gr.Markdown("## πŸ’¬ Your Personalized Teaching Assistant")
gr.Markdown("Ask educational questions about any subject. I'll guide you to discover the answers yourself.")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(label="Your Educational Question")
clear = gr.Button("Clear Chat")
def respond(message: str, chat_history: List, profile: Dict) -> Tuple[str, List]:
"""Handle chat responses with multimedia"""
response, multimedia = educational_chatbot.generate_response(message, profile)
# Format multimedia suggestions
if multimedia:
response += "\n\n**Suggested Resources:**\n"
for item in multimedia:
response += f"- [{item['type'].title()}] {item['source']}: {item['url']}\n"
chat_history.append((message, response))
return "", chat_history
msg.submit(
respond,
inputs=[msg, chatbot, gr.State(profile_manager.load_profile(session_token.value))],
outputs=[msg, chatbot]
)
clear.click(lambda: None, None, chatbot, queue=False)
with gr.Tab("Goals & Planning", id=5):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 🎯 Step 5: Set Academic Goals")
with gr.Group():
goal_type = gr.Dropdown(
label="Goal Type",
choices=["GPA Improvement", "Course Completion", "Test Score", "Other"],
value="GPA Improvement"
)
goal_description = gr.Textbox(label="Goal Description")
goal_target_date = gr.Textbox(label="Target Date (YYYY-MM-DD)", placeholder="2025-12-31")
goal_target_value = gr.Number(label="Target Value (if applicable)", visible=False)
add_goal_btn = gr.Button("Add Goal", variant="primary")
gr.Markdown("### πŸ“… Study Calendar")
calendar_start_date = gr.Textbox(label="Calendar Start Date (YYYY-MM-DD)", value=datetime.date.today().isoformat())
generate_calendar_btn = gr.Button("Generate Study Calendar")
with gr.Column(scale=2):
gr.Markdown("### Your Goals")
goals_output = gr.HTML()
goal_viz = gr.Plot(label="Goal Progress", visible=False)
gr.Markdown("### Your Study Calendar")
calendar_output = gr.HTML()
calendar_viz = gr.Plot(label="Calendar Visualization", visible=False)
goal_type.change(
fn=lambda gt: gr.update(visible=gt in ["GPA Improvement", "Test Score"]),
inputs=goal_type,
outputs=goal_target_value
)
def update_goals_display(profile_name):
goals = goal_tracker.get_goals(profile_name)
if not goals:
return (
"<div class='alert-box'>No goals set yet. Add your first goal above!</div>",
gr.update(visible=False)
)
goals_html = []
for goal in goals:
progress = goal['progress'][-1]['value'] if goal['progress'] else 0
target = goal['target_value'] if goal['target_value'] is not None else "N/A"
goals_html.append(f"""
<div class='goal-card'>
<h4>{goal['description']}</h4>
<p><strong>Type:</strong> {goal['type']}</p>
<p><strong>Target Date:</strong> {goal['target_date']}</p>
<p><strong>Progress:</strong> {progress} / {target}</p>
{f"<p><strong>Last Note:</strong> {goal['progress'][-1]['notes']}</p>" if goal['progress'] else ""}
</div>
""")
return (
"\n".join(goals_html),
gr.update(visible=goal_tracker.create_goal_visualization(goals) is not None)
)
def update_calendar_display(profile_name, start_date_str):
try:
start_date = datetime.date.fromisoformat(start_date_str)
except ValueError:
return (
"<div class='error-message'>Invalid date format. Please use YYYY-MM-DD</div>",
gr.update(visible=False)
)
profile = profile_manager.load_profile(profile_name, session_token.value)
if not profile:
return (
"<div class='alert-box'>Please complete and save your profile first</div>",
gr.update(visible=False)
)
calendar = study_calendar.generate_study_calendar(profile, start_date.isoformat())
calendar_html = []
current_date = datetime.date.fromisoformat(calendar['start_date'])
end_date = datetime.date.fromisoformat(calendar['end_date'])
while current_date <= end_date:
day_events = [
e for e in calendar['events']
if datetime.date.fromisoformat(e['date']) == current_date
]
day_exams = [
e for e in calendar['exams']
if datetime.date.fromisoformat(e['date']) == current_date
]
if day_events or day_exams:
calendar_html.append(f"<h4>{current_date.strftime('%A, %B %d')}</h4>")
for event in day_events:
calendar_html.append(f"""
<div class='calendar-event'>
<p><strong>πŸ“š {event['title']}</strong></p>
<p>⏱️ {event['duration']}</p>
<p>{event['description']}</p>
</div>
""")
for exam in day_exams:
calendar_html.append(f"""
<div class='calendar-event' style='border-left-color: #f44336;'>
<p><strong>πŸ“ {exam['title']}</strong></p>
<p>⏰ All day</p>
<p>Prepare by reviewing materials and practicing problems</p>
</div>
""")
current_date += datetime.timedelta(days=1)
return (
"\n".join(calendar_html) if calendar_html else "<div class='alert-box'>No study sessions scheduled yet</div>",
gr.update(visible=study_calendar.create_calendar_visualization(calendar) is not None)
)
add_goal_btn.click(
fn=lambda gt, desc, date, val: (
goal_tracker.add_goal(name.value, gt, desc, date, val),
update_goals_display(name.value)
),
inputs=[goal_type, goal_description, goal_target_date, goal_target_value],
outputs=[goals_output, goal_viz]
).then(
fn=lambda: name.value,
inputs=None,
outputs=None
).then(
fn=update_goals_display,
inputs=name,
outputs=[goals_output, goal_viz]
)
generate_calendar_btn.click(
fn=lambda date: update_calendar_display(name.value, date),
inputs=calendar_start_date,
outputs=[calendar_output, calendar_viz]
)
def navigate_to_tab(tab_index: int, tab_completed_status: dict):
for i in range(tab_index):
if not tab_completed_status.get(i, False):
messages = [
"Please complete the transcript analysis first",
"Please complete the learning style quiz first",
"Please fill out your personal information first",
"Please save your profile first",
"Please complete the previous steps first"
]
return (
gr.Tabs(selected=i),
gr.update(
value=f"<div class='error-message'>β›” {messages[i]}</div>",
visible=True
)
)
return gr.Tabs(selected=tab_index), gr.update(visible=False)
step1.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(0), tab_completed],
outputs=[tabs, nav_message]
)
step2.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(1), tab_completed],
outputs=[tabs, nav_message]
)
step3.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(2), tab_completed],
outputs=[tabs, nav_message]
)
step4.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(3), tab_completed],
outputs=[tabs, nav_message]
)
step5.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(4), tab_completed],
outputs=[tabs, nav_message]
)
step6.click(
lambda idx, status: navigate_to_tab(idx, status),
inputs=[gr.State(5), tab_completed],
outputs=[tabs, nav_message]
)
def toggle_dark_mode(dark):
return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
dark_mode.change(
fn=toggle_dark_mode,
inputs=dark_mode,
outputs=None
)
app.load(fn=lambda: get_model_and_tokenizer(), outputs=[])
return app
app = create_enhanced_interface()
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860)