Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import json | |
import os | |
import re | |
from PyPDF2 import PdfReader | |
from collections import defaultdict | |
from typing import Dict, List, Optional, Tuple, Union | |
import html | |
from pathlib import Path | |
import fitz # PyMuPDF | |
import pytesseract | |
from PIL import Image | |
import io | |
import secrets | |
import string | |
from huggingface_hub import HfApi, HfFolder | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import time | |
import logging | |
import asyncio | |
from functools import lru_cache | |
import hashlib | |
from concurrent.futures import ThreadPoolExecutor | |
from pydantic import BaseModel | |
# ========== CONFIGURATION ========== | |
PROFILES_DIR = "student_profiles" | |
ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"] | |
MAX_FILE_SIZE_MB = 5 | |
MIN_AGE = 5 | |
MAX_AGE = 120 | |
SESSION_TOKEN_LENGTH = 32 | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
SESSION_TIMEOUT = 3600 # 1 hour session timeout | |
# Initialize logging | |
logging.basicConfig( | |
level=logging.DEBUG, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
filename='transcript_parser.log' | |
) | |
# Model configuration - Using smaller model | |
MODEL_NAME = "deepseek-ai/deepseek-llm-1.3b" | |
# Initialize Hugging Face API | |
if HF_TOKEN: | |
try: | |
hf_api = HfApi(token=HF_TOKEN) | |
HfFolder.save_token(HF_TOKEN) | |
except Exception as e: | |
logging.error(f"Failed to initialize Hugging Face API: {str(e)}") | |
# ========== MODEL LOADER ========== | |
class ModelLoader: | |
def __init__(self): | |
self.model = None | |
self.tokenizer = None | |
self.loaded = False | |
self.loading = False | |
self.error = None | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
def load_model(self, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]: | |
"""Lazy load the model with progress feedback""" | |
try: | |
if progress: | |
progress(0.1, desc="Checking GPU availability...") | |
torch.cuda.empty_cache() | |
if progress: | |
progress(0.2, desc="Loading tokenizer...") | |
tokenizer = AutoTokenizer.from_pretrained( | |
MODEL_NAME, | |
trust_remote_code=True | |
) | |
if progress: | |
progress(0.5, desc="Loading model (this may take a few minutes)...") | |
model_kwargs = { | |
"trust_remote_code": True, | |
"torch_dtype": torch.float16 if self.device == "cuda" else torch.float32, | |
"device_map": "auto" if self.device == "cuda" else None, | |
"low_cpu_mem_usage": True, | |
"offload_folder": "offload" | |
} | |
try: | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
**model_kwargs | |
) | |
except torch.cuda.OutOfMemoryError: | |
model_kwargs["device_map"] = None | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
**model_kwargs | |
).to('cpu') | |
self.device = 'cpu' | |
test_input = tokenizer("Test", return_tensors="pt").to(self.device) | |
_ = model.generate(**test_input, max_new_tokens=1) | |
self.model = model.eval() | |
self.tokenizer = tokenizer | |
self.loaded = True | |
return model, tokenizer | |
except Exception as e: | |
self.error = f"Model loading failed: {str(e)}" | |
logging.error(self.error) | |
return None, None | |
# Initialize model loader | |
model_loader = ModelLoader() | |
def get_model_and_tokenizer(): | |
return model_loader.load_model() | |
# ========== UTILITY FUNCTIONS ========== | |
def generate_session_token() -> str: | |
alphabet = string.ascii_letters + string.digits | |
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH)) | |
def sanitize_input(text: str) -> str: | |
if not text: | |
return "" | |
text = html.escape(text.strip()) | |
text = re.sub(r'<[^>]*>', '', text) | |
text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text) | |
return text | |
def validate_name(name: str) -> str: | |
name = name.strip() | |
if not name: | |
raise ValueError("Name cannot be empty.") | |
if len(name) > 100: | |
raise ValueError("Name is too long (maximum 100 characters).") | |
if any(c.isdigit() for c in name): | |
raise ValueError("Name cannot contain numbers.") | |
return name | |
def validate_age(age: Union[int, float, str]) -> int: | |
try: | |
age_int = int(age) | |
if not MIN_AGE <= age_int <= MAX_AGE: | |
raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.") | |
return age_int | |
except (ValueError, TypeError): | |
raise ValueError("Please enter a valid age number.") | |
def validate_file(file_obj) -> None: | |
if not file_obj: | |
raise ValueError("Please upload a file first") | |
file_ext = os.path.splitext(file_obj.name)[1].lower() | |
if file_ext not in ALLOWED_FILE_TYPES: | |
raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}") | |
file_size = os.path.getsize(file_obj.name) / (1024 * 1024) | |
if file_size > MAX_FILE_SIZE_MB: | |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.") | |
# ========== TEXT EXTRACTION FUNCTIONS ========== | |
def extract_text_from_file(file_path: str, file_ext: str) -> str: | |
text = "" | |
try: | |
if file_ext == '.pdf': | |
try: | |
# First try pdfplumber for better table extraction | |
import pdfplumber | |
with pdfplumber.open(file_path) as pdf: | |
for page in pdf.pages: | |
text += page.extract_text() + '\n' | |
if not text.strip(): | |
raise ValueError("PDFPlumber returned empty text") | |
except Exception as e: | |
logging.warning(f"PDFPlumber failed: {str(e)}. Trying PyMuPDF...") | |
doc = fitz.open(file_path) | |
for page in doc: | |
text += page.get_text("text") + '\n' | |
if not text.strip(): | |
logging.warning("PyMuPDF returned empty text, trying OCR fallback...") | |
text = extract_text_from_pdf_with_ocr(file_path) | |
elif file_ext in ['.png', '.jpg', '.jpeg']: | |
text = extract_text_with_ocr(file_path) | |
text = clean_extracted_text(text) | |
if not text.strip(): | |
raise ValueError("No text could be extracted.") | |
return text | |
except Exception as e: | |
logging.error(f"Text extraction error: {str(e)}") | |
raise gr.Error(f"Failed to extract text: {str(e)}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<5MB)") | |
def extract_text_from_pdf_with_ocr(file_path: str) -> str: | |
try: | |
import pdf2image | |
images = pdf2image.convert_from_path(file_path, dpi=300) | |
custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;()-/ ' | |
text = "" | |
for i, img in enumerate(images): | |
# Pre-process image | |
img = img.convert('L') # Grayscale | |
img = img.point(lambda x: 0 if x < 140 else 255) # Increase contrast | |
# OCR with retry logic | |
try: | |
page_text = pytesseract.image_to_string(img, config=custom_config) | |
if len(page_text.strip()) > 20: # Minimum viable text | |
text += f"PAGE {i+1}:\n{page_text}\n\n" | |
except Exception as e: | |
logging.warning(f"OCR failed on page {i+1}: {str(e)}") | |
return text if text else "No readable text found" | |
except Exception as e: | |
raise ValueError(f"OCR processing failed: {str(e)}") | |
def extract_text_with_ocr(file_path: str) -> str: | |
try: | |
image = Image.open(file_path) | |
image = image.convert('L') | |
image = image.point(lambda x: 0 if x < 128 else 255, '1') | |
custom_config = r'--oem 3 --psm 6' | |
text = pytesseract.image_to_string(image, config=custom_config) | |
return text | |
except Exception as e: | |
raise ValueError(f"OCR processing failed: {str(e)}") | |
def clean_extracted_text(text: str) -> str: | |
text = re.sub(r'\s+', ' ', text).strip() | |
replacements = { | |
'|': 'I', | |
'‘': "'", | |
'’': "'", | |
'“': '"', | |
'”': '"', | |
'fi': 'fi', | |
'fl': 'fl' | |
} | |
for wrong, right in replacements.items(): | |
text = text.replace(wrong, right) | |
return text | |
def remove_sensitive_info(text: str) -> str: | |
text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text) | |
text = re.sub(r'\b\d{6,9}\b', '[ID]', text) | |
text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text) | |
return text | |
# ========== TRANSCRIPT PARSING ========== | |
class Course(BaseModel): | |
requirement: str | |
school_year: str | |
grade_level: str | |
course_code: str | |
description: str | |
term: str | |
district_number: str | |
fg: str | |
included: str | |
credits: str | |
class GraduationProgress(BaseModel): | |
student_name: str | |
student_id: str | |
current_grade: str | |
year_of_graduation: str | |
unweighted_gpa: float | |
weighted_gpa: float | |
community_service_hours: int | |
community_service_date: str | |
total_credits_earned: float | |
virtual_grade: str | |
requirements: Dict[str, Dict[str, float]] | |
courses: List[Course] | |
assessments: Dict[str, str] | |
class TranscriptParser: | |
def __init__(self): | |
self.student_data = {} | |
self.requirements = {} | |
self.current_courses = [] | |
self.course_history = [] | |
self.graduation_status = {} | |
def parse_transcript(self, text: str) -> Dict: | |
"""Parse transcript text and return structured data""" | |
try: | |
# First try the new detailed parser | |
parsed_data = self._parse_detailed_transcript(text) | |
if parsed_data: | |
return parsed_data | |
# Fall back to simplified parser if detailed parsing fails | |
return self._parse_simplified_transcript(text) | |
except Exception as e: | |
logging.error(f"Error parsing transcript: {str(e)}") | |
raise ValueError(f"Couldn't parse transcript: {str(e)}") | |
def _parse_detailed_transcript(self, text: str) -> Optional[Dict]: | |
"""Parse detailed transcript format""" | |
try: | |
parsed_data = { | |
'student_info': {}, | |
'requirements': {}, | |
'course_history': [], | |
'assessments': {} | |
} | |
# Extract student info | |
student_info_match = re.search(r"(\d{7}) - (.*?)\n", text) | |
if student_info_match: | |
parsed_data['student_info']['id'] = student_info_match.group(1) | |
parsed_data['student_info']['name'] = student_info_match.group(2).strip() | |
current_grade_match = re.search(r"Current Grade: (\d+)", text) | |
if current_grade_match: | |
parsed_data['student_info']['grade'] = current_grade_match.group(1) | |
yog_match = re.search(r"YOG (\d{4})", text) | |
if yog_match: | |
parsed_data['student_info']['year_of_graduation'] = yog_match.group(1) | |
unweighted_gpa_match = re.search(r"Un-weighted GPA (\d+\.\d+)", text) | |
if unweighted_gpa_match: | |
parsed_data['student_info']['unweighted_gpa'] = float(unweighted_gpa_match.group(1)) | |
weighted_gpa_match = re.search(r"Weighted GPA (\d+\.\d+)", text) | |
if weighted_gpa_match: | |
parsed_data['student_info']['weighted_gpa'] = float(weighted_gpa_match.group(1)) | |
service_hours_match = re.search(r"Comm Serv Hours (\d+)", text) | |
if service_hours_match: | |
parsed_data['student_info']['community_service_hours'] = int(service_hours_match.group(1)) | |
service_date_match = re.search(r"Comm Serv Date (\d{2}/\d{2}/\d{4})", text) | |
if service_date_match: | |
parsed_data['student_info']['community_service_date'] = service_date_match.group(1) | |
credits_match = re.search(r"Total Credits Earned (\d+\.\d+)", text) | |
if credits_match: | |
parsed_data['student_info']['total_credits'] = float(credits_match.group(1)) | |
virtual_grade_match = re.search(r"Virtual Grade (\w+)", text) | |
if virtual_grade_match: | |
parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1) | |
# Extract requirements | |
req_pattern = re.compile(r"([A-Z]-.*?)\s*\|\s*(.*?)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+) %") | |
for match in req_pattern.finditer(text): | |
code = match.group(1).strip() | |
desc = match.group(2).strip() | |
required = float(match.group(3)) | |
waived = float(match.group(4)) | |
completed = float(match.group(5)) | |
percent = float(match.group(6)) | |
parsed_data['requirements'][code] = { | |
"description": desc, | |
"required": required, | |
"waived": waived, | |
"completed": completed, | |
"percent_complete": percent | |
} | |
# Extract assessments | |
assess_pattern = re.compile(r"Z-Assessment: (.*?)\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %") | |
for match in assess_pattern.finditer(text): | |
name = f"Assessment: {match.group(1)}" | |
status = match.group(3) | |
parsed_data['assessments'][name] = status | |
for z_item in ["Community Service Hours", "GPA"]: | |
if re.search(fr"Z-{z_item.replace(' ', '.*?')}\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %", text): | |
status = re.search(fr"Z-{z_item.replace(' ', '.*?')}\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %", text).group(2) | |
parsed_data['assessments'][z_item] = status | |
# Extract courses (simplified for now - can be enhanced) | |
course_pattern = r'([A-Z]{2,4}\s?\d{3})\s+(.*?)\s+([A-F][+-]?)\s+([0-9.]+)' | |
courses = re.findall(course_pattern, text) | |
for course in courses: | |
parsed_data['course_history'].append({ | |
'course_code': course[0], | |
'description': course[1], | |
'grade': course[2], | |
'credits': float(course[3]) | |
}) | |
return parsed_data | |
except Exception as e: | |
logging.warning(f"Detailed transcript parsing failed, falling back to simple parser: {str(e)}") | |
return None | |
def _parse_simplified_transcript(self, text: str) -> Dict: | |
"""Fallback simplified transcript parser with multiple pattern attempts""" | |
patterns = [ | |
(r'(?:Course|Subject)\s*Code.*?Grade.*?Credits(.*?)(?:\n\s*\n|\Z)', 'table'), | |
(r'([A-Z]{2,4}\s?\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', 'line'), | |
(r'(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', 'minimal') | |
] | |
for pattern, pattern_type in patterns: | |
try: | |
if pattern_type == 'table': | |
# Parse tabular data | |
courses = re.findall(r'([A-Z]{2,4}\s?\d{3}[A-Z]?)\s+(.*?)\s+([A-F][+-]?)\s+(\d+\.?\d*)', | |
re.search(pattern, text, re.DOTALL).group(1)) | |
elif pattern_type == 'line': | |
courses = re.findall(pattern, text) | |
else: | |
courses = re.findall(pattern, text) | |
if courses: | |
parsed_data = {'course_history': []} | |
for course in courses: | |
parsed_data['course_history'].append({ | |
'course_code': course[0].strip(), | |
'description': course[1].strip() if len(course) > 1 else '', | |
'grade': course[2].strip() if len(course) > 2 else '', | |
'credits': float(course[3]) if len(course) > 3 else 0.0 | |
}) | |
return parsed_data | |
except: | |
continue | |
raise ValueError("Could not identify course information in transcript") | |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]: | |
"""Process transcript file and return simple confirmation""" | |
try: | |
if not file_obj: | |
raise gr.Error("Please upload a transcript file first (PDF or image)") | |
validate_file(file_obj) | |
file_ext = os.path.splitext(file_obj.name)[1].lower() | |
# Additional PDF validation | |
if file_ext == '.pdf': | |
try: | |
with open(file_obj.name, 'rb') as f: | |
PdfReader(f) # Test if PDF is readable | |
except Exception as e: | |
raise gr.Error(f"Invalid PDF file: {str(e)}. Please upload a non-corrupted PDF.") | |
if progress: | |
progress(0.2, desc="Extracting text from file...") | |
try: | |
text = extract_text_from_file(file_obj.name, file_ext) | |
except Exception as e: | |
raise ValueError(f"Failed to extract text: {str(e)}. The file may be corrupted or in an unsupported format.") | |
if not text.strip(): | |
raise ValueError("The file appears to be empty or contains no readable text.") | |
if progress: | |
progress(0.5, desc="Parsing transcript...") | |
parser = TranscriptParser() | |
try: | |
parsed_data = parser.parse_transcript(text) | |
except Exception as e: | |
raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}") | |
confirmation = "Transcript processed successfully." | |
if 'gpa' in parsed_data.get('student_info', {}): | |
confirmation += f"\nGPA detected: {parsed_data['student_info']['gpa']}" | |
return confirmation, parsed_data | |
except Exception as e: | |
error_msg = f"Error processing transcript: {str(e)}" | |
logging.error(error_msg) | |
raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<5MB)") | |
# ========== LEARNING STYLE QUIZ ========== | |
class LearningStyleQuiz: | |
def __init__(self): | |
self.questions = [ | |
"When you study for a test, you prefer to:", | |
"When you need directions to a new place, you prefer:", | |
"When you learn a new skill, you prefer to:", | |
"When you're trying to concentrate, you:", | |
"When you meet new people, you remember them by:", | |
"When you're assembling furniture or a gadget, you:", | |
"When choosing a restaurant, you rely most on:", | |
"When you're in a waiting room, you typically:", | |
"When giving someone instructions, you tend to:", | |
"When you're trying to recall information, you:", | |
"When you're at a museum or exhibit, you:", | |
"When you're learning a new language, you prefer:", | |
"When you're taking notes in class, you:", | |
"When you're explaining something complex, you:", | |
"When you're at a party, you enjoy:", | |
"When you're trying to remember a phone number, you:", | |
"When you're relaxing, you prefer to:", | |
"When you're learning to use new software, you:", | |
"When you're giving a presentation, you rely on:", | |
"When you're solving a difficult problem, you:" | |
] | |
self.options = [ | |
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"], | |
["Look at a map (Visual)", "Have someone tell you (Auditory)", "Write down directions (Reading/Writing)", "Try walking/driving there (Kinesthetic)"], | |
["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"], | |
["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"], | |
["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"], | |
["Read the instructions carefully (Reading/Writing)", "Look at the diagrams (Visual)", "Ask someone to explain (Auditory)", "Start putting pieces together (Kinesthetic)"], | |
["Online photos of the food (Visual)", "Recommendations from friends (Auditory)", "Reading the menu online (Reading/Writing)", "Remembering how it felt to eat there (Kinesthetic)"], | |
["Read magazines (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Fidget or move around (Kinesthetic)"], | |
["Write them down (Reading/Writing)", "Explain verbally (Auditory)", "Demonstrate (Visual)", "Guide them physically (Kinesthetic)"], | |
["See written words in your mind (Visual)", "Hear the information in your head (Auditory)", "Write it down to remember (Reading/Writing)", "Associate it with physical actions (Kinesthetic)"], | |
["Read all the descriptions (Reading/Writing)", "Listen to audio guides (Auditory)", "Look at the displays (Visual)", "Touch interactive exhibits (Kinesthetic)"], | |
["Study grammar rules (Reading/Writing)", "Listen to native speakers (Auditory)", "Use flashcards with images (Visual)", "Practice conversations (Kinesthetic)"], | |
["Write detailed paragraphs (Reading/Writing)", "Record the lecture (Auditory)", "Draw diagrams and charts (Visual)", "Doodle while listening (Kinesthetic)"], | |
["Write detailed steps (Reading/Writing)", "Explain verbally with examples (Auditory)", "Draw diagrams (Visual)", "Use physical objects to demonstrate (Kinesthetic)"], | |
["Conversations with people (Auditory)", "Watching others or the environment (Visual)", "Writing notes or texting (Reading/Writing)", "Dancing or physical activities (Kinesthetic)"], | |
["See the numbers in your head (Visual)", "Say them aloud (Auditory)", "Write them down (Reading/Writing)", "Dial them on a keypad (Kinesthetic)"], | |
["Read a book (Reading/Writing)", "Listen to music (Auditory)", "Watch TV/movies (Visual)", "Do something physical (Kinesthetic)"], | |
["Read the manual (Reading/Writing)", "Ask someone to show you (Visual)", "Call tech support (Auditory)", "Experiment with the software (Kinesthetic)"], | |
["Detailed notes (Reading/Writing)", "Verbal explanations (Auditory)", "Visual slides (Visual)", "Physical demonstrations (Kinesthetic)"], | |
["Write out possible solutions (Reading/Writing)", "Talk through it with someone (Auditory)", "Draw diagrams (Visual)", "Build a model or prototype (Kinesthetic)"] | |
] | |
self.learning_styles = { | |
"Visual": { | |
"description": "Visual learners prefer using images, diagrams, and spatial understanding.", | |
"tips": [ | |
"Use color coding in your notes", | |
"Create mind maps and diagrams", | |
"Watch educational videos", | |
"Use flashcards with images", | |
"Highlight important information in different colors" | |
], | |
"careers": [ | |
"Graphic Designer", "Architect", "Photographer", | |
"Engineer", "Surgeon", "Pilot" | |
] | |
}, | |
"Auditory": { | |
"description": "Auditory learners learn best through listening and speaking.", | |
"tips": [ | |
"Record lectures and listen to them", | |
"Participate in study groups", | |
"Explain concepts out loud to yourself", | |
"Use rhymes or songs to remember information", | |
"Listen to educational podcasts" | |
], | |
"careers": [ | |
"Musician", "Journalist", "Lawyer", | |
"Psychologist", "Teacher", "Customer Service" | |
] | |
}, | |
"Reading/Writing": { | |
"description": "These learners prefer information displayed as words.", | |
"tips": [ | |
"Write detailed notes", | |
"Create summaries in your own words", | |
"Read textbooks and articles", | |
"Make lists to organize information", | |
"Rewrite your notes to reinforce learning" | |
], | |
"careers": [ | |
"Writer", "Researcher", "Editor", | |
"Accountant", "Programmer", "Historian" | |
] | |
}, | |
"Kinesthetic": { | |
"description": "Kinesthetic learners learn through movement and hands-on activities.", | |
"tips": [ | |
"Use hands-on activities", | |
"Take frequent movement breaks", | |
"Create physical models", | |
"Associate information with physical actions", | |
"Study while walking or pacing" | |
], | |
"careers": [ | |
"Athlete", "Chef", "Mechanic", | |
"Dancer", "Physical Therapist", "Carpenter" | |
] | |
} | |
} | |
def evaluate_quiz(self, *answers) -> str: | |
"""Evaluate quiz answers and return learning style results""" | |
answers = list(answers) | |
if len(answers) != len(self.questions): | |
raise gr.Error("Please answer all questions before submitting") | |
scores = {style: 0 for style in self.learning_styles} | |
for i, answer in enumerate(answers): | |
if not answer: | |
continue | |
for j, style in enumerate(self.learning_styles): | |
if answer == self.options[i][j]: | |
scores[style] += 1 | |
break | |
total_answered = sum(1 for ans in answers if ans) | |
if total_answered == 0: | |
raise gr.Error("No answers provided") | |
percentages = {style: (score/total_answered)*100 for style, score in scores.items()} | |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True) | |
result = "## Your Learning Style Results\n\n" | |
result += "### Scores:\n" | |
for style, score in sorted_styles: | |
result += f"- **{style}**: {score}/{total_answered} ({percentages[style]:.1f}%)\n" | |
max_score = max(scores.values()) | |
primary_styles = [style for style, score in scores.items() if score == max_score] | |
result += "\n### Analysis:\n" | |
if len(primary_styles) == 1: | |
primary_style = primary_styles[0] | |
style_info = self.learning_styles[primary_style] | |
result += f"Your primary learning style is **{primary_style}**\n\n" | |
result += f"**{primary_style} Characteristics**:\n" | |
result += f"{style_info['description']}\n\n" | |
result += "**Recommended Study Strategies**:\n" | |
for tip in style_info['tips']: | |
result += f"- {tip}\n" | |
result += "\n**Potential Career Paths**:\n" | |
for career in style_info['careers'][:6]: | |
result += f"- {career}\n" | |
complementary = [s for s in sorted_styles if s[0] != primary_style][0][0] | |
result += f"\nYou might also benefit from some **{complementary}** strategies:\n" | |
for tip in self.learning_styles[complementary]['tips'][:3]: | |
result += f"- {tip}\n" | |
else: | |
result += "You have multiple strong learning styles:\n" | |
for style in primary_styles: | |
result += f"- **{style}**\n" | |
result += "\n**Combined Learning Strategies**:\n" | |
result += "You may benefit from combining different learning approaches:\n" | |
for style in primary_styles: | |
result += f"\n**{style}** techniques:\n" | |
for tip in self.learning_styles[style]['tips'][:2]: | |
result += f"- {tip}\n" | |
result += f"\n**{style}** career suggestions:\n" | |
for career in self.learning_styles[style]['careers'][:3]: | |
result += f"- {career}\n" | |
return result | |
learning_style_quiz = LearningStyleQuiz() | |
# ========== PROFILE MANAGEMENT ========== | |
class ProfileManager: | |
def __init__(self): | |
self.profiles_dir = Path(PROFILES_DIR) | |
self.profiles_dir.mkdir(exist_ok=True, parents=True) | |
self.current_session = None | |
def set_session(self, session_token: str) -> None: | |
self.current_session = session_token | |
def get_profile_path(self, name: str) -> Path: | |
if self.current_session: | |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] | |
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json" | |
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json" | |
def save_profile(self, name: str, age: Union[int, str], interests: str, | |
transcript: Dict, learning_style: str, | |
movie: str, movie_reason: str, show: str, show_reason: str, | |
book: str, book_reason: str, character: str, character_reason: str, | |
blog: str) -> str: | |
try: | |
name = validate_name(name) | |
age = validate_age(age) | |
if not interests.strip(): | |
raise ValueError("Please describe at least one interest or hobby.") | |
if not transcript: | |
raise ValueError("Please complete the transcript analysis first.") | |
if not learning_style or "Your primary learning style is:" not in learning_style: | |
raise ValueError("Please complete the learning style quiz first.") | |
favorites = { | |
"movie": sanitize_input(movie), | |
"movie_reason": sanitize_input(movie_reason), | |
"show": sanitize_input(show), | |
"show_reason": sanitize_input(show_reason), | |
"book": sanitize_input(book), | |
"book_reason": sanitize_input(book_reason), | |
"character": sanitize_input(character), | |
"character_reason": sanitize_input(character_reason) | |
} | |
data = { | |
"name": name, | |
"age": age, | |
"interests": sanitize_input(interests), | |
"transcript": transcript, | |
"learning_style": learning_style, | |
"favorites": favorites, | |
"blog": sanitize_input(blog) if blog else "", | |
"session_token": self.current_session, | |
"last_updated": time.time() | |
} | |
filepath = self.get_profile_path(name) | |
with open(filepath, "w", encoding='utf-8') as f: | |
json.dump(data, f, indent=2, ensure_ascii=False) | |
if HF_TOKEN and 'hf_api' in globals(): | |
try: | |
hf_api.upload_file( | |
path_or_fileobj=filepath, | |
path_in_repo=f"profiles/{filepath.name}", | |
repo_id="your-username/student-learning-assistant", | |
repo_type="dataset" | |
) | |
except Exception as e: | |
logging.error(f"Failed to upload to HF Hub: {str(e)}") | |
# Return simple confirmation with GPA if available | |
confirmation = f"Profile saved successfully for {name}." | |
if 'gpa' in data.get('transcript', {}).get('student_info', {}): | |
confirmation += f"\nGPA: {data['transcript']['student_info']['gpa']}" | |
return confirmation | |
except Exception as e: | |
logging.error(f"Profile validation error: {str(e)}") | |
raise gr.Error(f"Couldn't save profile: {str(e)}") | |
def load_profile(self, name: str = None, session_token: str = None) -> Dict: | |
try: | |
if session_token: | |
profile_pattern = f"*{session_token}_profile.json" | |
else: | |
profile_pattern = "*.json" | |
profiles = list(self.profiles_dir.glob(profile_pattern)) | |
if not profiles: | |
return {} | |
if name: | |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16] | |
if session_token: | |
profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json" | |
else: | |
profile_file = self.profiles_dir / f"{name_hash}_profile.json" | |
if not profile_file.exists(): | |
if HF_TOKEN and 'hf_api' in globals(): | |
try: | |
hf_api.download_file( | |
path_in_repo=f"profiles/{profile_file.name}", | |
repo_id="your-username/student-learning-assistant", | |
repo_type="dataset", | |
local_dir=self.profiles_dir | |
) | |
except: | |
raise gr.Error(f"No profile found for {name}") | |
else: | |
raise gr.Error(f"No profile found for {name}") | |
else: | |
profile_file = profiles[0] | |
with open(profile_file, "r", encoding='utf-8') as f: | |
profile_data = json.load(f) | |
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT: | |
raise gr.Error("Session expired. Please start a new session.") | |
return profile_data | |
except Exception as e: | |
logging.error(f"Error loading profile: {str(e)}") | |
return {} | |
def list_profiles(self, session_token: str = None) -> List[str]: | |
if session_token: | |
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json")) | |
else: | |
profiles = list(self.profiles_dir.glob("*.json")) | |
profile_names = [] | |
for p in profiles: | |
with open(p, "r", encoding='utf-8') as f: | |
try: | |
data = json.load(f) | |
profile_names.append(data.get('name', p.stem)) | |
except json.JSONDecodeError: | |
continue | |
return profile_names | |
profile_manager = ProfileManager() | |
# ========== AI TEACHING ASSISTANT ========== | |
class TeachingAssistant: | |
def __init__(self): | |
self.context_history = [] | |
self.max_context_length = 5 | |
async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str: | |
try: | |
profile = profile_manager.load_profile(session_token=session_token) | |
if not profile: | |
return "Please complete and save your profile first." | |
self._update_context(message, history) | |
# Focus on GPA if mentioned | |
if "gpa" in message.lower(): | |
gpa = profile.get("transcript", {}).get("student_info", {}).get("gpa", "unknown") | |
return f"Your GPA is {gpa}. Would you like advice on improving it?" | |
# Generic response otherwise | |
return "I'm your learning assistant. Ask me about your GPA, courses, or study tips." | |
except Exception as e: | |
logging.error(f"Error generating response: {str(e)}") | |
return "I encountered an error. Please try again." | |
def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None: | |
self.context_history.append({"role": "user", "content": message}) | |
if history: | |
for h in history[-self.max_context_length:]: | |
if h[0]: | |
self.context_history.append({"role": "user", "content": h[0]}) | |
if h[1]: | |
self.context_history.append({"role": "assistant", "content": h[1]}) | |
self.context_history = self.context_history[-(self.max_context_length*2):] | |
teaching_assistant = TeachingAssistant() | |
# ========== GRADIO INTERFACE ========== | |
def create_interface(): | |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app: | |
session_token = gr.State(value=generate_session_token()) | |
profile_manager.set_session(session_token.value) | |
tab_completed = gr.State({ | |
0: False, # Transcript Upload | |
1: False, # Learning Style Quiz | |
2: False, # Personal Questions | |
3: False, # Save & Review | |
4: False # AI Assistant | |
}) | |
# Custom CSS | |
app.css = """ | |
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; } | |
.tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; } | |
.completed-tab { background: #4CAF50 !important; color: white !important; } | |
.incomplete-tab { background: #E0E0E0 !important; } | |
.nav-message { padding: 10px; margin: 10px 0; border-radius: 4px; background-color: #ffebee; color: #c62828; } | |
.file-upload { border: 2px dashed #4CAF50 !important; padding: 20px !important; border-radius: 8px !important; text-align: center; } | |
.file-upload:hover { background: #f5f5f5; } | |
.progress-bar { height: 5px; background: linear-gradient(to right, #4CAF50, #8BC34A); margin-bottom: 15px; border-radius: 3px; } | |
.quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; } | |
.quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; } | |
.error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; } | |
.transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; } | |
.error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; } | |
.dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; } | |
.dark .quiz-question { background-color: #3d3d3d !important; } | |
.dark .quiz-results { background-color: #2e3d2e !important; } | |
.dark textarea, .dark input { background-color: #333 !important; color: #eee !important; } | |
.dark .output-markdown { color: #eee !important; } | |
.dark .chatbot { background-color: #333 !important; } | |
.dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; } | |
""" | |
# Header | |
with gr.Row(): | |
with gr.Column(scale=4): | |
gr.Markdown(""" | |
# Student Learning Assistant | |
**Your personalized education companion** | |
Complete each step to get customized learning recommendations. | |
""") | |
with gr.Column(scale=1): | |
dark_mode = gr.Checkbox(label="Dark Mode", value=False) | |
# Navigation buttons | |
with gr.Row(): | |
with gr.Column(scale=1, min_width=100): | |
step1 = gr.Button("1. Transcript", elem_classes="incomplete-tab") | |
with gr.Column(scale=1, min_width=100): | |
step2 = gr.Button("2. Quiz", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step3 = gr.Button("3. Profile", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step4 = gr.Button("4. Review", elem_classes="incomplete-tab", interactive=False) | |
with gr.Column(scale=1, min_width=100): | |
step5 = gr.Button("5. Assistant", elem_classes="incomplete-tab", interactive=False) | |
nav_message = gr.HTML(visible=False) | |
# Main tabs | |
with gr.Tabs(visible=True) as tabs: | |
# ===== TAB 1: TRANSCRIPT UPLOAD ===== | |
with gr.Tab("Transcript", id=0): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### Step 1: Upload Your Transcript") | |
with gr.Group(elem_classes="file-upload"): | |
file_input = gr.File( | |
label="Drag and drop your transcript here (PDF or Image)", | |
file_types=ALLOWED_FILE_TYPES, | |
type="filepath" | |
) | |
upload_btn = gr.Button("Analyze Transcript", variant="primary") | |
file_error = gr.HTML(visible=False) | |
with gr.Column(scale=2): | |
transcript_output = gr.Textbox( | |
label="Analysis Results", | |
lines=5, | |
interactive=False, | |
elem_classes="transcript-results" | |
) | |
transcript_data = gr.State() | |
file_input.change( | |
fn=lambda f: ( | |
gr.update(visible=False), | |
gr.update(value="File ready for analysis!", visible=True) if f | |
else gr.update(value="Please upload a file", visible=False) | |
), | |
inputs=file_input, | |
outputs=[file_error, transcript_output] | |
) | |
upload_btn.click( | |
fn=parse_transcript, | |
inputs=[file_input, tab_completed], | |
outputs=[transcript_output, transcript_data] | |
).then( | |
fn=lambda: {0: True}, | |
inputs=None, | |
outputs=tab_completed | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step1 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step2 | |
) | |
# ===== TAB 2: LEARNING STYLE QUIZ ===== | |
with gr.Tab("Learning Style Quiz", id=1): | |
with gr.Column(): | |
gr.Markdown("### Step 2: Discover Your Learning Style") | |
progress = gr.HTML("<div class='progress-bar' style='width: 0%'></div>") | |
quiz_components = [] | |
with gr.Accordion("Quiz Questions", open=True): | |
for i, (question, options) in enumerate(zip(learning_style_quiz.questions, learning_style_quiz.options)): | |
with gr.Group(elem_classes="quiz-question"): | |
q = gr.Radio( | |
options, | |
label=f"{i+1}. {question}", | |
show_label=True | |
) | |
quiz_components.append(q) | |
with gr.Row(): | |
quiz_submit = gr.Button("Submit Quiz", variant="primary") | |
quiz_clear = gr.Button("Clear Answers") | |
quiz_alert = gr.HTML(visible=False) | |
learning_output = gr.Markdown( | |
label="Your Learning Style Results", | |
visible=False, | |
elem_classes="quiz-results" | |
) | |
for component in quiz_components: | |
component.change( | |
fn=lambda *answers: { | |
progress: gr.HTML( | |
f"<div class='progress-bar' style='width: {sum(1 for a in answers if a)/len(answers)*100}%'></div>" | |
) | |
}, | |
inputs=quiz_components, | |
outputs=progress | |
) | |
quiz_submit.click( | |
fn=lambda *answers: learning_style_quiz.evaluate_quiz(*answers), | |
inputs=quiz_components, | |
outputs=learning_output | |
).then( | |
fn=lambda: gr.update(visible=True), | |
outputs=learning_output | |
).then( | |
fn=lambda: {1: True}, | |
inputs=None, | |
outputs=tab_completed | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step2 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step3 | |
) | |
quiz_clear.click( | |
fn=lambda: [None] * len(quiz_components), | |
outputs=quiz_components | |
).then( | |
fn=lambda: gr.HTML("<div class='progress-bar' style='width: 0%'></div>"), | |
outputs=progress | |
) | |
# ===== TAB 3: PERSONAL QUESTIONS ===== | |
with gr.Tab("Personal Profile", id=2): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### Step 3: Tell Us About Yourself") | |
with gr.Group(): | |
name = gr.Textbox(label="Full Name", placeholder="Your name") | |
age = gr.Number(label="Age", minimum=MIN_AGE, maximum=MAX_AGE, precision=0) | |
interests = gr.Textbox( | |
label="Your Interests/Hobbies", | |
placeholder="e.g., Science, Music, Sports, Art..." | |
) | |
save_personal_btn = gr.Button("Save Information", variant="primary") | |
save_confirmation = gr.HTML(visible=False) | |
with gr.Column(scale=1): | |
gr.Markdown("### Favorites") | |
with gr.Group(): | |
movie = gr.Textbox(label="Favorite Movie") | |
movie_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
show = gr.Textbox(label="Favorite TV Show") | |
show_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
book = gr.Textbox(label="Favorite Book") | |
book_reason = gr.Textbox(label="Why do you like it?", lines=2) | |
character = gr.Textbox(label="Favorite Character (from any story)") | |
character_reason = gr.Textbox(label="Why do you like them?", lines=2) | |
with gr.Accordion("Personal Blog (Optional)", open=False): | |
blog = gr.Textbox( | |
label="Share your thoughts", | |
placeholder="Write something about yourself...", | |
lines=5 | |
) | |
save_personal_btn.click( | |
fn=lambda n, a, i, ts: ( | |
{2: True}, | |
gr.update(elem_classes="completed-tab"), | |
gr.update(interactive=True), | |
gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True) | |
), | |
inputs=[name, age, interests, tab_completed], | |
outputs=[tab_completed, step3, step4, save_confirmation] | |
) | |
# ===== TAB 4: SAVE & REVIEW ===== | |
with gr.Tab("Save Profile", id=3): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### Step 4: Review & Save Your Profile") | |
with gr.Group(): | |
load_profile_dropdown = gr.Dropdown( | |
label="Load Existing Profile", | |
choices=profile_manager.list_profiles(session_token.value), | |
visible=False | |
) | |
with gr.Row(): | |
load_btn = gr.Button("Load", visible=False) | |
delete_btn = gr.Button("Delete", variant="stop", visible=False) | |
save_btn = gr.Button("Save Profile", variant="primary") | |
clear_btn = gr.Button("Clear Form") | |
with gr.Column(scale=2): | |
output_summary = gr.Markdown( | |
"Your profile summary will appear here after saving.", | |
label="Profile Summary" | |
) | |
save_btn.click( | |
fn=profile_manager.save_profile, | |
inputs=[ | |
name, age, interests, transcript_data, learning_output, | |
movie, movie_reason, show, show_reason, | |
book, book_reason, character, character_reason, blog | |
], | |
outputs=output_summary | |
).then( | |
fn=lambda: {3: True}, | |
inputs=None, | |
outputs=tab_completed | |
).then( | |
fn=lambda: gr.update(elem_classes="completed-tab"), | |
outputs=step4 | |
).then( | |
fn=lambda: gr.update(interactive=True), | |
outputs=step5 | |
).then( | |
fn=lambda: profile_manager.list_profiles(session_token.value), | |
outputs=load_profile_dropdown | |
).then( | |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))), | |
outputs=load_btn | |
).then( | |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))), | |
outputs=delete_btn | |
) | |
# ===== TAB 5: AI ASSISTANT ===== | |
with gr.Tab("AI Assistant", id=4): | |
gr.Markdown("## Your Personalized Learning Assistant") | |
gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.") | |
async def chat_wrapper(message: str, history: List[List[str]]): | |
response = await teaching_assistant.generate_response( | |
message, | |
history, | |
session_token.value | |
) | |
return response | |
chatbot = gr.ChatInterface( | |
fn=chat_wrapper, | |
examples=[ | |
"What's my GPA?", | |
"How should I study for math?", | |
"What courses am I taking?", | |
"Study tips for my learning style" | |
], | |
title="" | |
) | |
# Navigation logic | |
def navigate_to_tab(tab_index: int, tab_completed_status): | |
current_tab = tabs.selected | |
if tab_index <= current_tab: | |
return gr.Tabs(selected=tab_index), gr.update(visible=False) | |
# Check all previous tabs are completed | |
for i in range(tab_index): | |
if not tab_completed_status.get(i, False): | |
messages = [ | |
"Please complete the transcript analysis first", | |
"Please complete the learning style quiz first", | |
"Please fill out your personal information first", | |
"Please save your profile first" | |
] | |
return ( | |
gr.Tabs(selected=i), | |
gr.update( | |
value=f"<div class='error-message'>⛔ {messages[i]}</div>", | |
visible=True | |
) | |
) | |
return gr.Tabs(selected=tab_index), gr.update(visible=False) | |
step1.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(0), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step2.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(1), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step3.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(2), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step4.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(3), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
step5.click( | |
lambda idx, status: navigate_to_tab(idx, status), | |
inputs=[gr.State(4), tab_completed], | |
outputs=[tabs, nav_message] | |
) | |
# Dark mode toggle | |
def toggle_dark_mode(dark): | |
return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate") | |
dark_mode.change( | |
fn=toggle_dark_mode, | |
inputs=dark_mode, | |
outputs=None | |
) | |
# Load model on startup | |
app.load(fn=lambda: model_loader.load_model(), outputs=[]) | |
return app | |
app = create_interface() | |
if __name__ == "__main__": | |
app.launch() | |