Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -41,8 +41,8 @@ logging.basicConfig(
|
|
41 |
filename='transcript_parser.log'
|
42 |
)
|
43 |
|
44 |
-
# Model configuration -
|
45 |
-
MODEL_NAME = "deepseek-ai/deepseek-llm-
|
46 |
|
47 |
# Initialize Hugging Face API
|
48 |
if HF_TOKEN:
|
@@ -52,14 +52,6 @@ if HF_TOKEN:
|
|
52 |
except Exception as e:
|
53 |
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
54 |
|
55 |
-
# ========== CACHING AND PERFORMANCE OPTIMIZATIONS ==========
|
56 |
-
executor = ThreadPoolExecutor(max_workers=4)
|
57 |
-
|
58 |
-
# Cache model loading
|
59 |
-
@lru_cache(maxsize=1)
|
60 |
-
def get_model_and_tokenizer():
|
61 |
-
return model_loader.load_model()
|
62 |
-
|
63 |
# ========== MODEL LOADER ==========
|
64 |
class ModelLoader:
|
65 |
def __init__(self):
|
@@ -76,7 +68,6 @@ class ModelLoader:
|
|
76 |
if progress:
|
77 |
progress(0.1, desc="Checking GPU availability...")
|
78 |
|
79 |
-
# Clear CUDA cache first
|
80 |
torch.cuda.empty_cache()
|
81 |
|
82 |
if progress:
|
@@ -90,13 +81,12 @@ class ModelLoader:
|
|
90 |
if progress:
|
91 |
progress(0.5, desc="Loading model (this may take a few minutes)...")
|
92 |
|
93 |
-
# More robust model loading
|
94 |
model_kwargs = {
|
95 |
"trust_remote_code": True,
|
96 |
"torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
|
97 |
"device_map": "auto" if self.device == "cuda" else None,
|
98 |
"low_cpu_mem_usage": True,
|
99 |
-
"offload_folder": "offload"
|
100 |
}
|
101 |
|
102 |
try:
|
@@ -105,7 +95,6 @@ class ModelLoader:
|
|
105 |
**model_kwargs
|
106 |
)
|
107 |
except torch.cuda.OutOfMemoryError:
|
108 |
-
# Fallback to CPU if GPU OOM
|
109 |
model_kwargs["device_map"] = None
|
110 |
model = AutoModelForCausalLM.from_pretrained(
|
111 |
MODEL_NAME,
|
@@ -113,7 +102,6 @@ class ModelLoader:
|
|
113 |
).to('cpu')
|
114 |
self.device = 'cpu'
|
115 |
|
116 |
-
# Verify model is responsive
|
117 |
test_input = tokenizer("Test", return_tensors="pt").to(self.device)
|
118 |
_ = model.generate(**test_input, max_new_tokens=1)
|
119 |
|
@@ -131,29 +119,27 @@ class ModelLoader:
|
|
131 |
# Initialize model loader
|
132 |
model_loader = ModelLoader()
|
133 |
|
|
|
|
|
|
|
|
|
134 |
# ========== UTILITY FUNCTIONS ==========
|
135 |
def generate_session_token() -> str:
|
136 |
-
"""Generate a random session token for user identification."""
|
137 |
alphabet = string.ascii_letters + string.digits
|
138 |
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
|
139 |
|
140 |
def sanitize_input(text: str) -> str:
|
141 |
-
"""Sanitize user input to prevent XSS and injection attacks."""
|
142 |
if not text:
|
143 |
return ""
|
144 |
-
# Basic HTML escaping and removal of potentially dangerous characters
|
145 |
text = html.escape(text.strip())
|
146 |
-
# Remove any remaining HTML tags
|
147 |
text = re.sub(r'<[^>]*>', '', text)
|
148 |
-
# Remove potentially dangerous characters
|
149 |
text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
|
150 |
return text
|
151 |
|
152 |
def validate_name(name: str) -> str:
|
153 |
-
"""Validate name input."""
|
154 |
name = name.strip()
|
155 |
if not name:
|
156 |
-
raise ValueError("Name cannot be empty.
|
157 |
if len(name) > 100:
|
158 |
raise ValueError("Name is too long (maximum 100 characters).")
|
159 |
if any(c.isdigit() for c in name):
|
@@ -161,7 +147,6 @@ def validate_name(name: str) -> str:
|
|
161 |
return name
|
162 |
|
163 |
def validate_age(age: Union[int, float, str]) -> int:
|
164 |
-
"""Validate and convert age input."""
|
165 |
try:
|
166 |
age_int = int(age)
|
167 |
if not MIN_AGE <= age_int <= MAX_AGE:
|
@@ -171,7 +156,6 @@ def validate_age(age: Union[int, float, str]) -> int:
|
|
171 |
raise ValueError("Please enter a valid age number.")
|
172 |
|
173 |
def validate_file(file_obj) -> None:
|
174 |
-
"""Validate uploaded file."""
|
175 |
if not file_obj:
|
176 |
raise ValueError("Please upload a file first")
|
177 |
|
@@ -179,24 +163,22 @@ def validate_file(file_obj) -> None:
|
|
179 |
if file_ext not in ALLOWED_FILE_TYPES:
|
180 |
raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
|
181 |
|
182 |
-
file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
|
183 |
if file_size > MAX_FILE_SIZE_MB:
|
184 |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
|
185 |
|
186 |
# ========== TEXT EXTRACTION FUNCTIONS ==========
|
187 |
def extract_text_from_file(file_path: str, file_ext: str) -> str:
|
188 |
-
"""Enhanced text extraction with better error handling and fallbacks."""
|
189 |
text = ""
|
190 |
|
191 |
try:
|
192 |
if file_ext == '.pdf':
|
193 |
-
# First try PyMuPDF for text extraction
|
194 |
try:
|
195 |
doc = fitz.open(file_path)
|
196 |
for page in doc:
|
197 |
text += page.get_text("text") + '\n'
|
198 |
if not text.strip():
|
199 |
-
raise ValueError("PyMuPDF returned empty text
|
200 |
except Exception as e:
|
201 |
logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
|
202 |
text = extract_text_from_pdf_with_ocr(file_path)
|
@@ -204,56 +186,44 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
|
|
204 |
elif file_ext in ['.png', '.jpg', '.jpeg']:
|
205 |
text = extract_text_with_ocr(file_path)
|
206 |
|
207 |
-
# Clean up the extracted text
|
208 |
text = clean_extracted_text(text)
|
209 |
|
210 |
if not text.strip():
|
211 |
-
raise ValueError("No text could be extracted.
|
212 |
|
213 |
return text
|
214 |
|
215 |
except Exception as e:
|
216 |
logging.error(f"Text extraction error: {str(e)}")
|
217 |
-
raise gr.Error(f"Failed to extract text: {str(e)}
|
218 |
|
219 |
def extract_text_from_pdf_with_ocr(file_path: str) -> str:
|
220 |
-
"""Fallback PDF text extraction using OCR."""
|
221 |
text = ""
|
222 |
try:
|
223 |
doc = fitz.open(file_path)
|
224 |
for page in doc:
|
225 |
pix = page.get_pixmap()
|
226 |
img = Image.open(io.BytesIO(pix.tobytes()))
|
227 |
-
|
228 |
-
img = img.
|
229 |
-
img = img.point(lambda x: 0 if x < 128 else 255) # Binarize
|
230 |
text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
|
231 |
except Exception as e:
|
232 |
-
raise ValueError(f"PDF OCR failed: {str(e)}
|
233 |
return text
|
234 |
|
235 |
def extract_text_with_ocr(file_path: str) -> str:
|
236 |
-
"""Extract text from image files using OCR with preprocessing."""
|
237 |
try:
|
238 |
image = Image.open(file_path)
|
239 |
-
|
240 |
-
|
241 |
-
image = image.convert('L') # Convert to grayscale
|
242 |
-
image = image.point(lambda x: 0 if x < 128 else 255, '1') # Thresholding
|
243 |
-
|
244 |
-
# Custom Tesseract configuration
|
245 |
custom_config = r'--oem 3 --psm 6'
|
246 |
text = pytesseract.image_to_string(image, config=custom_config)
|
247 |
return text
|
248 |
except Exception as e:
|
249 |
-
raise ValueError(f"OCR processing failed: {str(e)}
|
250 |
|
251 |
def clean_extracted_text(text: str) -> str:
|
252 |
-
"""Clean and normalize the extracted text."""
|
253 |
-
# Remove multiple spaces and newlines
|
254 |
text = re.sub(r'\s+', ' ', text).strip()
|
255 |
-
|
256 |
-
# Fix common OCR errors
|
257 |
replacements = {
|
258 |
'|': 'I',
|
259 |
'‘': "'",
|
@@ -263,38 +233,16 @@ def clean_extracted_text(text: str) -> str:
|
|
263 |
'fi': 'fi',
|
264 |
'fl': 'fl'
|
265 |
}
|
266 |
-
|
267 |
for wrong, right in replacements.items():
|
268 |
text = text.replace(wrong, right)
|
269 |
-
|
270 |
return text
|
271 |
|
272 |
def remove_sensitive_info(text: str) -> str:
|
273 |
-
"""Remove potentially sensitive information from transcript text."""
|
274 |
-
# Remove social security numbers
|
275 |
text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text)
|
276 |
-
# Remove student IDs (assuming 6-9 digit numbers)
|
277 |
text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
|
278 |
-
# Remove email addresses
|
279 |
text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
|
280 |
return text
|
281 |
|
282 |
-
def validate_parsed_data(data: Dict) -> bool:
|
283 |
-
"""Validate the structure of parsed transcript data"""
|
284 |
-
required_student_fields = ['name', 'current_grade']
|
285 |
-
required_course_fields = ['description', 'credits']
|
286 |
-
|
287 |
-
if 'student_info' not in data:
|
288 |
-
return False
|
289 |
-
if not all(field in data['student_info'] for field in required_student_fields):
|
290 |
-
return False
|
291 |
-
if 'course_history' not in data or not isinstance(data['course_history'], list):
|
292 |
-
return False
|
293 |
-
if len(data['course_history']) > 0:
|
294 |
-
if not all(field in data['course_history'][0] for field in required_course_fields):
|
295 |
-
return False
|
296 |
-
return True
|
297 |
-
|
298 |
# ========== TRANSCRIPT PARSING ==========
|
299 |
class TranscriptParser:
|
300 |
def __init__(self):
|
@@ -305,353 +253,45 @@ class TranscriptParser:
|
|
305 |
self.graduation_status = {}
|
306 |
|
307 |
def parse_transcript(self, text: str) -> Dict:
|
308 |
-
"""
|
309 |
try:
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
# Fall back to AI parsing if not Miami-Dade format
|
315 |
-
return parse_transcript_with_ai_fallback(text)
|
316 |
-
|
317 |
-
except Exception as e:
|
318 |
-
logging.error(f"Error parsing transcript: {str(e)}")
|
319 |
-
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
320 |
-
|
321 |
-
def _parse_miami_dade_format(self, text: str, strict_mode: bool = False) -> Dict:
|
322 |
-
"""Parse Miami-Dade County Public Schools transcripts."""
|
323 |
-
# Initialize PDF reader from text (simulating the PDF structure)
|
324 |
-
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
325 |
-
|
326 |
-
# Initialize data structure
|
327 |
-
data = {
|
328 |
-
'student_info': {},
|
329 |
-
'graduation_requirements': [],
|
330 |
-
'course_history': [],
|
331 |
-
'summary': {},
|
332 |
-
'format': 'miami_dade' # Add format identifier
|
333 |
-
}
|
334 |
-
|
335 |
-
# Parse student information
|
336 |
-
student_info_found = False
|
337 |
-
for i, line in enumerate(lines):
|
338 |
-
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
339 |
-
# School info line
|
340 |
-
school_info = line.split('|')
|
341 |
-
if len(school_info) > 1:
|
342 |
-
data['student_info']['school'] = school_info[1].strip()
|
343 |
-
data['student_info']['district'] = school_info[2].strip() if len(school_info) > 2 else ''
|
344 |
-
|
345 |
-
# Student ID and name line
|
346 |
-
if i+1 < len(lines):
|
347 |
-
student_line = lines[i+1].split('-')
|
348 |
-
if len(student_line) > 1:
|
349 |
-
name_parts = student_line[1].split(',')
|
350 |
-
if len(name_parts) > 1:
|
351 |
-
data['student_info']['student_id'] = student_line[0].strip()
|
352 |
-
data['student_info']['student_name'] = name_parts[1].strip() + " " + name_parts[0].strip()
|
353 |
-
|
354 |
-
# Academic info line
|
355 |
-
if i+2 < len(lines):
|
356 |
-
academic_info = lines[i+2].split('|')
|
357 |
-
if len(academic_info) > 1:
|
358 |
-
data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip() if ':' in academic_info[1] else academic_info[1].strip()
|
359 |
-
if len(academic_info) > 2:
|
360 |
-
data['student_info']['graduation_year'] = academic_info[2].strip()
|
361 |
-
if len(academic_info) > 3:
|
362 |
-
gpa_part = academic_info[3].strip()
|
363 |
-
if 'Weighted GPA' in gpa_part:
|
364 |
-
data['student_info']['weighted_gpa'] = gpa_part.split(':')[1].strip() if ':' in gpa_part else ''
|
365 |
-
elif 'Un-weighted GPA' in gpa_part:
|
366 |
-
data['student_info']['unweighted_gpa'] = gpa_part.split(':')[1].strip() if ':' in gpa_part else ''
|
367 |
-
if len(academic_info) > 4:
|
368 |
-
data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip() if ':' in academic_info[4] else ''
|
369 |
-
if len(academic_info) > 5:
|
370 |
-
data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip() if ':' in academic_info[5] else ''
|
371 |
-
|
372 |
-
student_info_found = True
|
373 |
-
break
|
374 |
-
|
375 |
-
if not student_info_found and strict_mode:
|
376 |
-
raise ValueError("Could not find student information section")
|
377 |
-
|
378 |
-
# Parse graduation requirements
|
379 |
-
requirements_start = None
|
380 |
-
requirements_end = None
|
381 |
-
for i, line in enumerate(lines):
|
382 |
-
if "Code" in line and "Description" in line and "Required" in line:
|
383 |
-
requirements_start = i + 1
|
384 |
-
if requirements_start and "Total" in line:
|
385 |
-
requirements_end = i
|
386 |
-
break
|
387 |
-
|
388 |
-
if requirements_start and requirements_end:
|
389 |
-
for line in lines[requirements_start:requirements_end]:
|
390 |
-
if '|' in line:
|
391 |
-
parts = [p.strip() for p in line.split('|') if p.strip()]
|
392 |
-
if len(parts) >= 6:
|
393 |
-
req = {
|
394 |
-
'code': parts[0],
|
395 |
-
'description': parts[1],
|
396 |
-
'required': parts[2],
|
397 |
-
'waived': parts[3],
|
398 |
-
'completed': parts[4],
|
399 |
-
'status': parts[5]
|
400 |
-
}
|
401 |
-
data['graduation_requirements'].append(req)
|
402 |
|
403 |
-
#
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
if len(total_parts) >= 5:
|
408 |
-
data['summary']['total_required'] = total_parts[1]
|
409 |
-
data['summary']['total_waived'] = total_parts[2]
|
410 |
-
data['summary']['total_completed'] = total_parts[3]
|
411 |
-
data['summary']['completion_percentage'] = total_parts[4]
|
412 |
-
|
413 |
-
# Parse course history
|
414 |
-
course_history_start = None
|
415 |
-
for i, line in enumerate(lines):
|
416 |
-
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
417 |
-
course_history_start = i + 1
|
418 |
-
break
|
419 |
-
|
420 |
-
if course_history_start:
|
421 |
-
current_requirement = None
|
422 |
-
for line in lines[course_history_start:]:
|
423 |
-
if '|' in line:
|
424 |
-
parts = [p.strip() for p in line.split('|') if p.strip()]
|
425 |
-
|
426 |
-
# Check if this is a new requirement line
|
427 |
-
if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
|
428 |
-
current_requirement = parts[0]
|
429 |
-
parts = parts[1:] # Remove the requirement code
|
430 |
-
|
431 |
-
if len(parts) >= 9:
|
432 |
-
course = {
|
433 |
-
'requirement': current_requirement,
|
434 |
-
'school_year': parts[0],
|
435 |
-
'grade_level': parts[1],
|
436 |
-
'course_number': parts[2],
|
437 |
-
'description': parts[3],
|
438 |
-
'term': parts[4],
|
439 |
-
'district_number': parts[5],
|
440 |
-
'fg': parts[6],
|
441 |
-
'included': parts[7],
|
442 |
-
'credits': parts[8]
|
443 |
-
}
|
444 |
-
data['course_history'].append(course)
|
445 |
-
|
446 |
-
# Calculate graduation status
|
447 |
-
try:
|
448 |
-
if data['summary'].get('total_required') and data['summary'].get('total_completed'):
|
449 |
-
graduation_status = {
|
450 |
-
'total_required_credits': float(data['summary']['total_required']),
|
451 |
-
'total_completed_credits': float(data['summary']['total_completed']),
|
452 |
-
'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
|
453 |
-
'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
|
454 |
-
'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
|
455 |
-
}
|
456 |
-
data['graduation_status'] = graduation_status
|
457 |
-
except Exception as e:
|
458 |
-
if strict_mode:
|
459 |
-
raise ValueError(f"Error calculating graduation status: {str(e)}")
|
460 |
-
|
461 |
-
return data
|
462 |
-
|
463 |
-
def format_transcript_output(data: Dict) -> str:
|
464 |
-
"""Enhanced formatting for transcript output with format awareness"""
|
465 |
-
output = []
|
466 |
-
|
467 |
-
# Student Info Section
|
468 |
-
student = data.get("student_info", {})
|
469 |
-
output.append(f"## Student Transcript Summary\n{'='*50}")
|
470 |
-
output.append(f"**Name:** {student.get('name', 'Unknown')}")
|
471 |
-
output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
|
472 |
-
output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
|
473 |
-
output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
|
474 |
-
|
475 |
-
if 'unweighted_gpa' in student and 'weighted_gpa' in student:
|
476 |
-
output.append(f"**Unweighted GPA:** {student['unweighted_gpa']}")
|
477 |
-
output.append(f"**Weighted GPA:** {student['weighted_gpa']}")
|
478 |
-
elif 'gpa' in student:
|
479 |
-
output.append(f"**GPA:** {student['gpa']}")
|
480 |
-
|
481 |
-
if 'total_credits' in student:
|
482 |
-
output.append(f"**Total Credits Earned:** {student['total_credits']}")
|
483 |
-
if 'community_service_hours' in student:
|
484 |
-
output.append(f"**Community Service Hours:** {student['community_service_hours']}")
|
485 |
-
|
486 |
-
output.append("")
|
487 |
-
|
488 |
-
# Graduation Requirements Section (for Miami-Dade format)
|
489 |
-
if data.get('format') == 'miami_dade':
|
490 |
-
grad_status = data.get("graduation_status", {})
|
491 |
-
output.append(f"## Graduation Progress\n{'='*50}")
|
492 |
-
output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
|
493 |
-
output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
|
494 |
-
output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
|
495 |
-
output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
|
496 |
-
output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
|
497 |
-
|
498 |
-
# Detailed Requirements
|
499 |
-
output.append("### Detailed Requirements:")
|
500 |
-
for req in data.get("graduation_requirements", []):
|
501 |
-
output.append(
|
502 |
-
f"- **{req['code']}**: {req['description']}\n"
|
503 |
-
f" Required: {req['required']} | Completed: {req['completed']} | "
|
504 |
-
f"Status: {req['status']}"
|
505 |
-
)
|
506 |
-
output.append("")
|
507 |
-
|
508 |
-
# Current Courses
|
509 |
-
if any(c.get('credits', '') == 'inProgress' for c in data.get("course_history", [])):
|
510 |
-
output.append("## Current Courses (In Progress)\n" + '='*50)
|
511 |
-
for course in data["course_history"]:
|
512 |
-
if course.get('credits', '') == 'inProgress':
|
513 |
-
output.append(
|
514 |
-
f"- **{course['course_number']} {course['description']}**\n"
|
515 |
-
f" Category: {course['requirement']} | "
|
516 |
-
f"Grade Level: {course['grade_level']} | "
|
517 |
-
f"Term: {course['term']} | Credits: {course['credits']}"
|
518 |
-
)
|
519 |
-
output.append("")
|
520 |
-
|
521 |
-
# Course History by Year
|
522 |
-
courses_by_year = defaultdict(list)
|
523 |
-
for course in data.get("course_history", []):
|
524 |
-
if course.get("school_year"):
|
525 |
-
courses_by_year[course["school_year"]].append(course)
|
526 |
-
|
527 |
-
if courses_by_year:
|
528 |
-
output.append("## Course History\n" + '='*50)
|
529 |
-
for year in sorted(courses_by_year.keys()):
|
530 |
-
output.append(f"\n### {year}")
|
531 |
-
for course in courses_by_year[year]:
|
532 |
-
output.append(
|
533 |
-
f"- **{course.get('course_number', '')} {course.get('description', 'Unnamed course')}**\n"
|
534 |
-
f" Subject: {course.get('requirement', 'N/A')} | "
|
535 |
-
f"Grade: {course.get('fg', 'N/A')} | "
|
536 |
-
f"Credits: {course.get('credits', 'N/A')}"
|
537 |
-
)
|
538 |
-
|
539 |
-
return '\n'.join(output)
|
540 |
-
|
541 |
-
def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict:
|
542 |
-
"""More robust AI parsing with better error handling"""
|
543 |
-
try:
|
544 |
-
text = remove_sensitive_info(text[:20000]) # Increased limit
|
545 |
-
|
546 |
-
# Improved prompt with examples
|
547 |
-
prompt = f"""Extract academic transcript data as JSON. Follow this structure:
|
548 |
-
|
549 |
-
Example Input:
|
550 |
-
Student ID: 1234567 Name: DOE, JOHN Current Grade: 12 YOG: 2024
|
551 |
-
Unweighted GPA: 3.5 Weighted GPA: 4.2 Total Credits: 24.5
|
552 |
-
|
553 |
-
Example Output:
|
554 |
-
{{
|
555 |
-
"student_info": {{
|
556 |
-
"name": "John Doe",
|
557 |
-
"id": "1234567",
|
558 |
-
"current_grade": "12",
|
559 |
-
"graduation_year": "2024",
|
560 |
-
"unweighted_gpa": 3.5,
|
561 |
-
"weighted_gpa": 4.2,
|
562 |
-
"total_credits": 24.5
|
563 |
-
}},
|
564 |
-
"course_history": [
|
565 |
-
{{
|
566 |
-
"course_code": "MATH101",
|
567 |
-
"description": "Algebra I",
|
568 |
-
"grade": "A",
|
569 |
-
"credits": 1.0,
|
570 |
-
"school_year": "2022-2023"
|
571 |
-
}}
|
572 |
-
]
|
573 |
-
}}
|
574 |
-
|
575 |
-
Actual Transcript:
|
576 |
-
{text}
|
577 |
-
"""
|
578 |
-
|
579 |
-
if progress:
|
580 |
-
progress(0.3, desc="Processing with AI...")
|
581 |
-
|
582 |
-
model, tokenizer = get_model_and_tokenizer()
|
583 |
-
if model is None:
|
584 |
-
raise ValueError("Model not loaded")
|
585 |
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
**inputs,
|
590 |
-
max_new_tokens=2500,
|
591 |
-
temperature=0.3, # Lower for more consistent results
|
592 |
-
do_sample=True,
|
593 |
-
top_p=0.9,
|
594 |
-
repetition_penalty=1.2
|
595 |
-
)
|
596 |
-
|
597 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
598 |
-
|
599 |
-
# More robust JSON extraction
|
600 |
-
try:
|
601 |
-
if '```json' in response:
|
602 |
-
json_str = response.split('```json')[1].split('```')[0].strip()
|
603 |
-
else:
|
604 |
-
json_str = response.split('{', 1)[1].rsplit('}', 1)[0]
|
605 |
-
json_str = '{' + json_str + '}'
|
606 |
-
|
607 |
-
parsed_data = json.loads(json_str)
|
608 |
|
609 |
-
|
610 |
-
if
|
611 |
-
|
612 |
-
|
613 |
-
return parsed_data
|
614 |
|
615 |
-
|
616 |
-
|
617 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
618 |
|
619 |
-
|
620 |
-
logging.error(f"AI parsing error: {str(e)}")
|
621 |
-
raise gr.Error(f"Failed to parse transcript: {str(e)}")
|
622 |
-
|
623 |
-
def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
624 |
-
"""Enhanced AI parsing with fallback to structured parsing"""
|
625 |
-
try:
|
626 |
-
# First try structured parsing
|
627 |
-
if progress:
|
628 |
-
progress(0.1, desc="Attempting structured parsing...")
|
629 |
-
|
630 |
-
parser = TranscriptParser()
|
631 |
-
parsed_data = parser.parse_transcript(text)
|
632 |
-
|
633 |
-
# Validate the parsed data
|
634 |
-
if not validate_parsed_data(parsed_data):
|
635 |
-
raise ValueError("Structured parsing returned incomplete data")
|
636 |
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
return parsed_data
|
641 |
-
|
642 |
-
except Exception as e:
|
643 |
-
logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
|
644 |
-
|
645 |
-
# Fall back to AI parsing if structured parsing fails
|
646 |
-
return parse_transcript_with_ai_fallback(text, progress)
|
647 |
-
|
648 |
-
async def parse_transcript_async(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
649 |
-
"""Async wrapper for transcript parsing"""
|
650 |
-
loop = asyncio.get_event_loop()
|
651 |
-
return await loop.run_in_executor(executor, parse_transcript, file_obj, progress)
|
652 |
|
653 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
654 |
-
"""
|
655 |
try:
|
656 |
if not file_obj:
|
657 |
raise ValueError("Please upload a file first")
|
@@ -659,46 +299,29 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
|
|
659 |
validate_file(file_obj)
|
660 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
661 |
|
662 |
-
# Extract text from file with better error reporting
|
663 |
if progress:
|
664 |
progress(0.2, desc="Extracting text from file...")
|
665 |
|
666 |
text = extract_text_from_file(file_obj.name, file_ext)
|
667 |
|
668 |
if not text.strip():
|
669 |
-
raise ValueError("No text could be extracted from the file.
|
670 |
|
671 |
-
# Try structured parsing first
|
672 |
if progress:
|
673 |
-
progress(0.
|
674 |
|
675 |
parser = TranscriptParser()
|
676 |
-
|
677 |
-
parsed_data = parser.parse_transcript(text)
|
678 |
-
if validate_parsed_data(parsed_data):
|
679 |
-
if progress:
|
680 |
-
progress(0.9, desc="Formatting results...")
|
681 |
-
return format_transcript_output(parsed_data), parsed_data
|
682 |
-
except Exception as e:
|
683 |
-
logging.warning(f"Structured parsing failed: {str(e)}")
|
684 |
|
685 |
-
#
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
return
|
691 |
|
692 |
except Exception as e:
|
693 |
error_msg = f"Error processing transcript: {str(e)}"
|
694 |
-
# Add specific troubleshooting tips
|
695 |
-
if "PDF" in str(e):
|
696 |
-
error_msg += "\n\nTIPS:\n1. Try converting to image (screenshot)\n2. Ensure text is selectable in PDF\n3. Try a different PDF reader"
|
697 |
-
elif "image" in str(e).lower():
|
698 |
-
error_msg += "\n\nTIPS:\n1. Use high contrast images\n2. Crop to just the transcript\n3. Ensure good lighting"
|
699 |
-
elif "AI" in str(e):
|
700 |
-
error_msg += "\n\nTIPS:\n1. Try a smaller section of the transcript\n2. Check for sensitive info that may be redacted\n3. Try again later"
|
701 |
-
|
702 |
logging.error(error_msg)
|
703 |
return error_msg, None
|
704 |
|
@@ -811,8 +434,8 @@ class LearningStyleQuiz:
|
|
811 |
}
|
812 |
|
813 |
def evaluate_quiz(self, *answers) -> str:
|
814 |
-
"""Evaluate quiz answers and
|
815 |
-
answers = list(answers)
|
816 |
if len(answers) != len(self.questions):
|
817 |
raise gr.Error("Please answer all questions before submitting")
|
818 |
|
@@ -820,7 +443,7 @@ class LearningStyleQuiz:
|
|
820 |
|
821 |
for i, answer in enumerate(answers):
|
822 |
if not answer:
|
823 |
-
continue
|
824 |
|
825 |
for j, style in enumerate(self.learning_styles):
|
826 |
if answer == self.options[i][j]:
|
@@ -834,7 +457,6 @@ class LearningStyleQuiz:
|
|
834 |
percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
|
835 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
836 |
|
837 |
-
# Generate enhanced results report
|
838 |
result = "## Your Learning Style Results\n\n"
|
839 |
result += "### Scores:\n"
|
840 |
for style, score in sorted_styles:
|
@@ -860,7 +482,6 @@ class LearningStyleQuiz:
|
|
860 |
for career in style_info['careers'][:6]:
|
861 |
result += f"- {career}\n"
|
862 |
|
863 |
-
# Add complementary strategies
|
864 |
complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
|
865 |
result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
|
866 |
for tip in self.learning_styles[complementary]['tips'][:3]:
|
@@ -883,7 +504,6 @@ class LearningStyleQuiz:
|
|
883 |
|
884 |
return result
|
885 |
|
886 |
-
# Initialize quiz instance
|
887 |
learning_style_quiz = LearningStyleQuiz()
|
888 |
|
889 |
# ========== PROFILE MANAGEMENT ==========
|
@@ -894,13 +514,10 @@ class ProfileManager:
|
|
894 |
self.current_session = None
|
895 |
|
896 |
def set_session(self, session_token: str) -> None:
|
897 |
-
"""Set the current session token."""
|
898 |
self.current_session = session_token
|
899 |
|
900 |
def get_profile_path(self, name: str) -> Path:
|
901 |
-
"""Get profile path with session token if available."""
|
902 |
if self.current_session:
|
903 |
-
# Hash the name for security
|
904 |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
905 |
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
|
906 |
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
|
@@ -910,22 +527,9 @@ class ProfileManager:
|
|
910 |
movie: str, movie_reason: str, show: str, show_reason: str,
|
911 |
book: str, book_reason: str, character: str, character_reason: str,
|
912 |
blog: str) -> str:
|
913 |
-
"""Save student profile with better validation messages"""
|
914 |
try:
|
915 |
-
|
916 |
-
|
917 |
-
raise ValueError("Name cannot be empty. Please enter your full name.")
|
918 |
-
if len(name) > 100:
|
919 |
-
raise ValueError("Name is too long (maximum 100 characters).")
|
920 |
-
if any(c.isdigit() for c in name):
|
921 |
-
raise ValueError("Name cannot contain numbers.")
|
922 |
-
|
923 |
-
try:
|
924 |
-
age_int = int(age)
|
925 |
-
if not MIN_AGE <= age_int <= MAX_AGE:
|
926 |
-
raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
|
927 |
-
except (ValueError, TypeError):
|
928 |
-
raise ValueError("Please enter a valid age number.")
|
929 |
|
930 |
if not interests.strip():
|
931 |
raise ValueError("Please describe at least one interest or hobby.")
|
@@ -933,11 +537,9 @@ class ProfileManager:
|
|
933 |
if not transcript:
|
934 |
raise ValueError("Please complete the transcript analysis first.")
|
935 |
|
936 |
-
# Validate learning style quiz completion
|
937 |
if not learning_style or "Your primary learning style is:" not in learning_style:
|
938 |
raise ValueError("Please complete the learning style quiz first.")
|
939 |
|
940 |
-
# Prepare favorites data
|
941 |
favorites = {
|
942 |
"movie": sanitize_input(movie),
|
943 |
"movie_reason": sanitize_input(movie_reason),
|
@@ -949,26 +551,23 @@ class ProfileManager:
|
|
949 |
"character_reason": sanitize_input(character_reason)
|
950 |
}
|
951 |
|
952 |
-
# Prepare full profile data
|
953 |
data = {
|
954 |
"name": name,
|
955 |
-
"age":
|
956 |
"interests": sanitize_input(interests),
|
957 |
-
"transcript": transcript
|
958 |
-
"learning_style": learning_style
|
959 |
"favorites": favorites,
|
960 |
"blog": sanitize_input(blog) if blog else "",
|
961 |
"session_token": self.current_session,
|
962 |
"last_updated": time.time()
|
963 |
}
|
964 |
|
965 |
-
# Save to JSON file
|
966 |
filepath = self.get_profile_path(name)
|
967 |
|
968 |
with open(filepath, "w", encoding='utf-8') as f:
|
969 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
970 |
|
971 |
-
# Upload to HF Hub if token is available
|
972 |
if HF_TOKEN and 'hf_api' in globals():
|
973 |
try:
|
974 |
hf_api.upload_file(
|
@@ -980,14 +579,17 @@ class ProfileManager:
|
|
980 |
except Exception as e:
|
981 |
logging.error(f"Failed to upload to HF Hub: {str(e)}")
|
982 |
|
983 |
-
|
|
|
|
|
|
|
|
|
984 |
|
985 |
except Exception as e:
|
986 |
logging.error(f"Profile validation error: {str(e)}")
|
987 |
raise gr.Error(f"Couldn't save profile: {str(e)}")
|
988 |
-
|
989 |
def load_profile(self, name: str = None, session_token: str = None) -> Dict:
|
990 |
-
"""Load profile by name or return the first one found."""
|
991 |
try:
|
992 |
if session_token:
|
993 |
profile_pattern = f"*{session_token}_profile.json"
|
@@ -999,7 +601,6 @@ class ProfileManager:
|
|
999 |
return {}
|
1000 |
|
1001 |
if name:
|
1002 |
-
# Find profile by name (hashed)
|
1003 |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
1004 |
if session_token:
|
1005 |
profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json"
|
@@ -1007,7 +608,6 @@ class ProfileManager:
|
|
1007 |
profile_file = self.profiles_dir / f"{name_hash}_profile.json"
|
1008 |
|
1009 |
if not profile_file.exists():
|
1010 |
-
# Try loading from HF Hub
|
1011 |
if HF_TOKEN and 'hf_api' in globals():
|
1012 |
try:
|
1013 |
hf_api.download_file(
|
@@ -1021,12 +621,10 @@ class ProfileManager:
|
|
1021 |
else:
|
1022 |
raise gr.Error(f"No profile found for {name}")
|
1023 |
else:
|
1024 |
-
# Load the first profile found
|
1025 |
profile_file = profiles[0]
|
1026 |
|
1027 |
with open(profile_file, "r", encoding='utf-8') as f:
|
1028 |
profile_data = json.load(f)
|
1029 |
-
# Check session timeout
|
1030 |
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
|
1031 |
raise gr.Error("Session expired. Please start a new session.")
|
1032 |
return profile_data
|
@@ -1036,13 +634,11 @@ class ProfileManager:
|
|
1036 |
return {}
|
1037 |
|
1038 |
def list_profiles(self, session_token: str = None) -> List[str]:
|
1039 |
-
"""List all available profile names for the current session."""
|
1040 |
if session_token:
|
1041 |
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
|
1042 |
else:
|
1043 |
profiles = list(self.profiles_dir.glob("*.json"))
|
1044 |
|
1045 |
-
# Extract just the name part (without session token)
|
1046 |
profile_names = []
|
1047 |
for p in profiles:
|
1048 |
with open(p, "r", encoding='utf-8') as f:
|
@@ -1053,336 +649,54 @@ class ProfileManager:
|
|
1053 |
continue
|
1054 |
|
1055 |
return profile_names
|
1056 |
-
|
1057 |
-
def _generate_profile_summary(self, data: Dict) -> str:
|
1058 |
-
"""Generate markdown summary of the profile."""
|
1059 |
-
transcript = data.get("transcript", {})
|
1060 |
-
favorites = data.get("favorites", {})
|
1061 |
-
|
1062 |
-
# Extract just the learning style name
|
1063 |
-
learning_style = data.get("learning_style", "")
|
1064 |
-
if "Your primary learning style is:" in learning_style:
|
1065 |
-
style_match = re.search(r"Your primary learning style is: \*\*(.*?)\*\*", learning_style)
|
1066 |
-
if style_match:
|
1067 |
-
learning_style = style_match.group(1)
|
1068 |
-
|
1069 |
-
markdown = f"""## Student Profile: {data['name']}
|
1070 |
-
### Basic Information
|
1071 |
-
- **Age:** {data['age']}
|
1072 |
-
- **Interests:** {data.get('interests', 'Not specified')}
|
1073 |
-
- **Learning Style:** {learning_style}
|
1074 |
-
### Academic Information
|
1075 |
-
{self._format_transcript(transcript)}
|
1076 |
-
### Favorites
|
1077 |
-
- **Movie:** {favorites.get('movie', 'Not specified')}
|
1078 |
-
*Reason:* {favorites.get('movie_reason', 'Not specified')}
|
1079 |
-
- **TV Show:** {favorites.get('show', 'Not specified')}
|
1080 |
-
*Reason:* {favorites.get('show_reason', 'Not specified')}
|
1081 |
-
- **Book:** {favorites.get('book', 'Not specified')}
|
1082 |
-
*Reason:* {favorites.get('book_reason', 'Not specified')}
|
1083 |
-
- **Character:** {favorites.get('character', 'Not specified')}
|
1084 |
-
*Reason:* {favorites.get('character_reason', 'Not specified')}
|
1085 |
-
### Personal Blog
|
1086 |
-
{data.get('blog', '_No blog provided_')}
|
1087 |
-
"""
|
1088 |
-
return markdown
|
1089 |
-
|
1090 |
-
def _format_transcript(self, transcript: Dict) -> str:
|
1091 |
-
"""Format transcript data for display."""
|
1092 |
-
if not transcript or "course_history" not in transcript:
|
1093 |
-
return "_No transcript information available_"
|
1094 |
-
|
1095 |
-
display = "#### Course History\n"
|
1096 |
-
courses_by_year = defaultdict(list)
|
1097 |
-
for course in transcript.get("course_history", []):
|
1098 |
-
if course.get("school_year"):
|
1099 |
-
courses_by_year[course["school_year"]].append(course)
|
1100 |
-
|
1101 |
-
if courses_by_year:
|
1102 |
-
for year in sorted(courses_by_year.keys()):
|
1103 |
-
display += f"\n**{year}**\n"
|
1104 |
-
for course in courses_by_year[year]:
|
1105 |
-
display += f"- {course.get('course_code', '')} {course.get('description', 'Unnamed course')}"
|
1106 |
-
if 'grade' in course and course['grade']:
|
1107 |
-
display += f" (Grade: {course['grade']})"
|
1108 |
-
if 'credits' in course:
|
1109 |
-
display += f" | Credits: {course['credits']}"
|
1110 |
-
display += f" | Category: {course.get('requirement_category', 'N/A')}\n"
|
1111 |
-
|
1112 |
-
if 'student_info' in transcript:
|
1113 |
-
student = transcript['student_info']
|
1114 |
-
display += "\n**Academic Summary**\n"
|
1115 |
-
display += f"- Unweighted GPA: {student.get('unweighted_gpa', 'N/A')}\n"
|
1116 |
-
display += f"- Weighted GPA: {student.get('weighted_gpa', 'N/A')}\n"
|
1117 |
-
display += f"- Total Credits: {student.get('total_credits', 'N/A')}\n"
|
1118 |
-
|
1119 |
-
if 'graduation_status' in transcript:
|
1120 |
-
status = transcript['graduation_status']
|
1121 |
-
display += "\n**Graduation Progress**\n"
|
1122 |
-
display += f"- Completion: {status.get('percent_complete', 0)}%\n"
|
1123 |
-
display += f"- Credits Required: {status.get('total_required_credits', 0)}\n"
|
1124 |
-
display += f"- Credits Completed: {status.get('total_completed_credits', 0)}\n"
|
1125 |
-
display += f"- On Track: {'Yes' if status.get('on_track', False) else 'No'}\n"
|
1126 |
-
|
1127 |
-
return display
|
1128 |
|
1129 |
-
# Initialize profile manager
|
1130 |
profile_manager = ProfileManager()
|
1131 |
|
1132 |
# ========== AI TEACHING ASSISTANT ==========
|
1133 |
class TeachingAssistant:
|
1134 |
def __init__(self):
|
1135 |
self.context_history = []
|
1136 |
-
self.max_context_length = 5
|
1137 |
|
1138 |
async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
|
1139 |
-
"""Generate personalized response based on student profile and context."""
|
1140 |
try:
|
1141 |
-
# Load profile with session token
|
1142 |
profile = profile_manager.load_profile(session_token=session_token)
|
1143 |
if not profile:
|
1144 |
-
return "Please complete and save your profile first
|
1145 |
|
1146 |
-
# Update context history
|
1147 |
self._update_context(message, history)
|
1148 |
|
1149 |
-
#
|
1150 |
-
|
1151 |
-
|
1152 |
-
|
1153 |
-
gpa = profile.get("transcript", {}).get("student_info", {})
|
1154 |
-
interests = profile.get("interests", "")
|
1155 |
-
courses = profile.get("transcript", {}).get("course_history", [])
|
1156 |
-
favorites = profile.get("favorites", {})
|
1157 |
-
|
1158 |
-
# Process message with context
|
1159 |
-
response = await self._process_message(message, profile)
|
1160 |
|
1161 |
-
#
|
1162 |
-
|
1163 |
-
response += "\n\nWould you like me to suggest a study schedule based on your courses?"
|
1164 |
-
elif "course" in message.lower() or "class" in message.lower():
|
1165 |
-
response += "\n\nWould you like help finding resources for any of these courses?"
|
1166 |
-
|
1167 |
-
return response
|
1168 |
|
1169 |
except Exception as e:
|
1170 |
logging.error(f"Error generating response: {str(e)}")
|
1171 |
-
return "I encountered an error
|
1172 |
|
1173 |
def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
|
1174 |
-
"""Maintain conversation context."""
|
1175 |
self.context_history.append({"role": "user", "content": message})
|
1176 |
if history:
|
1177 |
for h in history[-self.max_context_length:]:
|
1178 |
-
if h[0]:
|
1179 |
self.context_history.append({"role": "user", "content": h[0]})
|
1180 |
-
if h[1]:
|
1181 |
self.context_history.append({"role": "assistant", "content": h[1]})
|
1182 |
|
1183 |
-
# Trim to maintain max context length
|
1184 |
self.context_history = self.context_history[-(self.max_context_length*2):]
|
1185 |
-
|
1186 |
-
async def _process_message(self, message: str, profile: Dict) -> str:
|
1187 |
-
"""Process user message with profile context."""
|
1188 |
-
message_lower = message.lower()
|
1189 |
-
|
1190 |
-
# Greetings
|
1191 |
-
if any(greet in message_lower for greet in ["hi", "hello", "hey", "greetings"]):
|
1192 |
-
return f"Hello {profile.get('name', 'there')}! How can I help you with your learning today?"
|
1193 |
-
|
1194 |
-
# Study help
|
1195 |
-
study_words = ["study", "learn", "prepare", "exam", "test", "homework"]
|
1196 |
-
if any(word in message_lower for word in study_words):
|
1197 |
-
return self._generate_study_advice(profile)
|
1198 |
-
|
1199 |
-
# Grade help
|
1200 |
-
grade_words = ["grade", "gpa", "score", "marks", "results"]
|
1201 |
-
if any(word in message_lower for word in grade_words):
|
1202 |
-
return self._generate_grade_advice(profile)
|
1203 |
-
|
1204 |
-
# Interest help
|
1205 |
-
interest_words = ["interest", "hobby", "passion", "extracurricular"]
|
1206 |
-
if any(word in message_lower for word in interest_words):
|
1207 |
-
return self._generate_interest_advice(profile)
|
1208 |
-
|
1209 |
-
# Course help
|
1210 |
-
course_words = ["courses", "classes", "transcript", "schedule", "subject"]
|
1211 |
-
if any(word in message_lower for word in course_words):
|
1212 |
-
return self._generate_course_advice(profile)
|
1213 |
-
|
1214 |
-
# Favorites
|
1215 |
-
favorite_words = ["movie", "show", "book", "character", "favorite"]
|
1216 |
-
if any(word in message_lower for word in favorite_words):
|
1217 |
-
return self._generate_favorites_response(profile)
|
1218 |
-
|
1219 |
-
# General help
|
1220 |
-
if "help" in message_lower:
|
1221 |
-
return self._generate_help_response()
|
1222 |
-
|
1223 |
-
# Default response
|
1224 |
-
return ("I'm your personalized teaching assistant. I can help with study tips, "
|
1225 |
-
"grade information, course advice, and more. Try asking about how to "
|
1226 |
-
"study effectively or about your course history.")
|
1227 |
-
|
1228 |
-
def _generate_study_advice(self, profile: Dict) -> str:
|
1229 |
-
"""Generate study advice based on learning style."""
|
1230 |
-
learning_style = profile.get("learning_style", "")
|
1231 |
-
response = ""
|
1232 |
-
|
1233 |
-
if "Visual" in learning_style:
|
1234 |
-
response = ("Based on your visual learning style, I recommend:\n"
|
1235 |
-
"- Creating colorful mind maps or diagrams\n"
|
1236 |
-
"- Using highlighters to color-code your notes\n"
|
1237 |
-
"- Watching educational videos on the topics\n"
|
1238 |
-
"- Creating flashcards with images\n\n")
|
1239 |
-
elif "Auditory" in learning_style:
|
1240 |
-
response = ("Based on your auditory learning style, I recommend:\n"
|
1241 |
-
"- Recording your notes and listening to them\n"
|
1242 |
-
"- Participating in study groups to discuss concepts\n"
|
1243 |
-
"- Explaining the material out loud to yourself\n"
|
1244 |
-
"- Finding podcasts or audio lectures on the topics\n\n")
|
1245 |
-
elif "Reading/Writing" in learning_style:
|
1246 |
-
response = ("Based on your reading/writing learning style, I recommend:\n"
|
1247 |
-
"- Writing detailed summaries in your own words\n"
|
1248 |
-
"- Creating organized outlines of the material\n"
|
1249 |
-
"- Reading additional textbooks or articles\n"
|
1250 |
-
"- Rewriting your notes to reinforce learning\n\n")
|
1251 |
-
elif "Kinesthetic" in learning_style:
|
1252 |
-
response = ("Based on your kinesthetic learning style, I recommend:\n"
|
1253 |
-
"- Creating physical models or demonstrations\n"
|
1254 |
-
"- Using hands-on activities to learn concepts\n"
|
1255 |
-
"- Taking frequent movement breaks while studying\n"
|
1256 |
-
"- Associating information with physical actions\n\n")
|
1257 |
-
else:
|
1258 |
-
response = ("Here are some general study tips:\n"
|
1259 |
-
"- Use the Pomodoro technique (25 min study, 5 min break)\n"
|
1260 |
-
"- Space out your study sessions over time\n"
|
1261 |
-
"- Test yourself with practice questions\n"
|
1262 |
-
"- Teach the material to someone else\n\n")
|
1263 |
-
|
1264 |
-
# Add time management advice
|
1265 |
-
response += ("**Time Management Tips**:\n"
|
1266 |
-
"- Create a study schedule and stick to it\n"
|
1267 |
-
"- Prioritize difficult subjects when you're most alert\n"
|
1268 |
-
"- Break large tasks into smaller, manageable chunks\n"
|
1269 |
-
"- Set specific goals for each study session")
|
1270 |
-
|
1271 |
-
return response
|
1272 |
-
|
1273 |
-
def _generate_grade_advice(self, profile: Dict) -> str:
|
1274 |
-
"""Generate response about grades and GPA."""
|
1275 |
-
gpa = profile.get("transcript", {}).get("student_info", {})
|
1276 |
-
courses = profile.get("transcript", {}).get("course_history", [])
|
1277 |
-
|
1278 |
-
response = (f"Your GPA information:\n"
|
1279 |
-
f"- Unweighted: {gpa.get('unweighted_gpa', 'N/A')}\n"
|
1280 |
-
f"- Weighted: {gpa.get('weighted_gpa', 'N/A')}\n\n")
|
1281 |
-
|
1282 |
-
# Identify any failing grades
|
1283 |
-
weak_subjects = []
|
1284 |
-
for course in courses:
|
1285 |
-
if course.get('grade', '').upper() in ['D', 'F']:
|
1286 |
-
weak_subjects.append(f"{course.get('course_code', '')} {course.get('description', 'Unknown course')}")
|
1287 |
-
|
1288 |
-
if weak_subjects:
|
1289 |
-
response += ("**Areas for Improvement**:\n"
|
1290 |
-
f"You might want to focus on these subjects: {', '.join(weak_subjects)}\n\n")
|
1291 |
-
|
1292 |
-
response += ("**Grade Improvement Strategies**:\n"
|
1293 |
-
"- Meet with your teachers to discuss your performance\n"
|
1294 |
-
"- Identify specific areas where you lost points\n"
|
1295 |
-
"- Create a targeted study plan for weak areas\n"
|
1296 |
-
"- Practice with past exams or sample questions")
|
1297 |
-
|
1298 |
-
return response
|
1299 |
-
|
1300 |
-
def _generate_interest_advice(self, profile: Dict) -> str:
|
1301 |
-
"""Generate response based on student interests."""
|
1302 |
-
interests = profile.get("interests", "")
|
1303 |
-
response = f"I see you're interested in: {interests}\n\n"
|
1304 |
-
|
1305 |
-
response += ("**Suggestions**:\n"
|
1306 |
-
"- Look for clubs or extracurricular activities related to these interests\n"
|
1307 |
-
"- Explore career paths that align with these interests\n"
|
1308 |
-
"- Find online communities or forums about these topics\n"
|
1309 |
-
"- Consider projects or independent study in these areas")
|
1310 |
-
|
1311 |
-
return response
|
1312 |
-
|
1313 |
-
def _generate_course_advice(self, profile: Dict) -> str:
|
1314 |
-
"""Generate response about courses."""
|
1315 |
-
courses = profile.get("transcript", {}).get("course_history", [])
|
1316 |
-
grade_level = profile.get("transcript", {}).get("student_info", {}).get("current_grade", "unknown")
|
1317 |
-
|
1318 |
-
response = "Here's a summary of your courses by year:\n"
|
1319 |
-
courses_by_year = defaultdict(list)
|
1320 |
-
for course in courses:
|
1321 |
-
if course.get("school_year"):
|
1322 |
-
courses_by_year[course["school_year"]].append(course)
|
1323 |
-
|
1324 |
-
for year in sorted(courses_by_year.keys()):
|
1325 |
-
response += f"\n**{year}**:\n"
|
1326 |
-
for course in courses_by_year[year]:
|
1327 |
-
response += f"- {course.get('course_code', '')} {course.get('description', 'Unnamed course')}"
|
1328 |
-
if 'grade' in course:
|
1329 |
-
response += f" (Grade: {course['grade']})"
|
1330 |
-
response += "\n"
|
1331 |
-
|
1332 |
-
response += f"\nAs a grade {grade_level} student, you might want to:\n"
|
1333 |
-
if grade_level in ["9", "10"]:
|
1334 |
-
response += ("- Focus on building strong foundational skills\n"
|
1335 |
-
"- Explore different subjects to find your interests\n"
|
1336 |
-
"- Start thinking about college/career requirements")
|
1337 |
-
elif grade_level in ["11", "12"]:
|
1338 |
-
response += ("- Focus on courses relevant to your college/career goals\n"
|
1339 |
-
"- Consider taking AP or advanced courses if available\n"
|
1340 |
-
"- Ensure you're meeting graduation requirements")
|
1341 |
-
|
1342 |
-
return response
|
1343 |
-
|
1344 |
-
def _generate_favorites_response(self, profile: Dict) -> str:
|
1345 |
-
"""Generate response about favorite items."""
|
1346 |
-
favorites = profile.get("favorites", {})
|
1347 |
-
response = "I see you enjoy:\n"
|
1348 |
-
|
1349 |
-
if favorites.get('movie'):
|
1350 |
-
response += f"- Movie: {favorites['movie']} ({favorites.get('movie_reason', 'no reason provided')})\n"
|
1351 |
-
if favorites.get('show'):
|
1352 |
-
response += f"- TV Show: {favorites['show']} ({favorites.get('show_reason', 'no reason provided')})\n"
|
1353 |
-
if favorites.get('book'):
|
1354 |
-
response += f"- Book: {favorites['book']} ({favorites.get('book_reason', 'no reason provided')})\n"
|
1355 |
-
if favorites.get('character'):
|
1356 |
-
response += f"- Character: {favorites['character']} ({favorites.get('character_reason', 'no reason provided')})\n"
|
1357 |
-
|
1358 |
-
response += "\nThese preferences suggest you might enjoy:\n"
|
1359 |
-
response += "- Similar books/movies in the same genre\n"
|
1360 |
-
response += "- Creative projects related to these stories\n"
|
1361 |
-
response += "- Analyzing themes or characters in your schoolwork"
|
1362 |
-
|
1363 |
-
return response
|
1364 |
-
|
1365 |
-
def _generate_help_response(self) -> str:
|
1366 |
-
"""Generate help response with available commands."""
|
1367 |
-
return ("""I can help with:
|
1368 |
-
- **Study tips**: "How should I study for math?"
|
1369 |
-
- **Grade information**: "What's my GPA?"
|
1370 |
-
- **Course advice**: "Show me my course history"
|
1371 |
-
- **Interest suggestions**: "What clubs match my interests?"
|
1372 |
-
- **General advice**: "How can I improve my grades?"
|
1373 |
-
Try asking about any of these topics!""")
|
1374 |
|
1375 |
-
# Initialize teaching assistant
|
1376 |
teaching_assistant = TeachingAssistant()
|
1377 |
|
1378 |
# ========== GRADIO INTERFACE ==========
|
1379 |
def create_interface():
|
1380 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
1381 |
-
# Session state
|
1382 |
session_token = gr.State(value=generate_session_token())
|
1383 |
profile_manager.set_session(session_token.value)
|
1384 |
|
1385 |
-
# Track completion status for each tab
|
1386 |
tab_completed = gr.State({
|
1387 |
0: False, # Transcript Upload
|
1388 |
1: False, # Learning Style Quiz
|
@@ -1391,7 +705,7 @@ def create_interface():
|
|
1391 |
4: False # AI Assistant
|
1392 |
})
|
1393 |
|
1394 |
-
# Custom CSS
|
1395 |
app.css = """
|
1396 |
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
|
1397 |
.tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
|
@@ -1404,7 +718,6 @@ def create_interface():
|
|
1404 |
.quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
|
1405 |
.error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
|
1406 |
|
1407 |
-
/* Dark mode support */
|
1408 |
.dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
|
1409 |
.dark .quiz-question { background-color: #3d3d3d !important; }
|
1410 |
.dark .quiz-results { background-color: #2e3d2e !important; }
|
@@ -1414,7 +727,7 @@ def create_interface():
|
|
1414 |
.dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
|
1415 |
"""
|
1416 |
|
1417 |
-
# Header
|
1418 |
with gr.Row():
|
1419 |
with gr.Column(scale=4):
|
1420 |
gr.Markdown("""
|
@@ -1440,7 +753,7 @@ def create_interface():
|
|
1440 |
|
1441 |
nav_message = gr.HTML(visible=False)
|
1442 |
|
1443 |
-
# Main tabs
|
1444 |
with gr.Tabs(visible=True) as tabs:
|
1445 |
# ===== TAB 1: TRANSCRIPT UPLOAD =====
|
1446 |
with gr.Tab("Transcript", id=0):
|
@@ -1459,59 +772,25 @@ def create_interface():
|
|
1459 |
with gr.Column(scale=2):
|
1460 |
transcript_output = gr.Textbox(
|
1461 |
label="Analysis Results",
|
1462 |
-
lines=
|
1463 |
interactive=False
|
1464 |
)
|
1465 |
transcript_data = gr.State()
|
1466 |
|
1467 |
-
def process_transcript(file_obj, current_tab_status):
|
1468 |
-
try:
|
1469 |
-
if not file_obj:
|
1470 |
-
raise ValueError("Please upload a transcript file first.")
|
1471 |
-
|
1472 |
-
output_text, data = parse_transcript(file_obj)
|
1473 |
-
|
1474 |
-
if "Error" in output_text:
|
1475 |
-
return (
|
1476 |
-
output_text,
|
1477 |
-
None,
|
1478 |
-
current_tab_status,
|
1479 |
-
gr.update(),
|
1480 |
-
gr.update(),
|
1481 |
-
gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
|
1482 |
-
gr.update(visible=False)
|
1483 |
-
)
|
1484 |
-
|
1485 |
-
new_status = current_tab_status.copy()
|
1486 |
-
new_status[0] = True
|
1487 |
-
return (
|
1488 |
-
output_text,
|
1489 |
-
data,
|
1490 |
-
new_status,
|
1491 |
-
gr.update(elem_classes="completed-tab"),
|
1492 |
-
gr.update(interactive=True),
|
1493 |
-
gr.update(visible=False),
|
1494 |
-
gr.update(visible=False)
|
1495 |
-
)
|
1496 |
-
|
1497 |
-
except Exception as e:
|
1498 |
-
error_msg = f"Error processing transcript: {str(e)}"
|
1499 |
-
if "PDF" in str(e):
|
1500 |
-
error_msg += "\n\nTIPS:\n- Try converting to image (screenshot)\n- Ensure text is selectable in PDF\n- Try a different PDF reader"
|
1501 |
-
return (
|
1502 |
-
error_msg,
|
1503 |
-
None,
|
1504 |
-
current_tab_status,
|
1505 |
-
gr.update(),
|
1506 |
-
gr.update(),
|
1507 |
-
gr.update(visible=True, value=f"<div class='error-message'>{error_msg}</div>"),
|
1508 |
-
gr.update(visible=False)
|
1509 |
-
)
|
1510 |
-
|
1511 |
upload_btn.click(
|
1512 |
-
|
1513 |
inputs=[file_input, tab_completed],
|
1514 |
-
outputs=[transcript_output, transcript_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1515 |
)
|
1516 |
|
1517 |
# ===== TAB 2: LEARNING STYLE QUIZ =====
|
@@ -1542,7 +821,6 @@ def create_interface():
|
|
1542 |
elem_classes="quiz-results"
|
1543 |
)
|
1544 |
|
1545 |
-
# Update progress bar as questions are answered
|
1546 |
for component in quiz_components:
|
1547 |
component.change(
|
1548 |
fn=lambda *answers: {
|
@@ -1554,38 +832,23 @@ def create_interface():
|
|
1554 |
outputs=progress
|
1555 |
)
|
1556 |
|
1557 |
-
def submit_quiz_and_update(*args):
|
1558 |
-
current_tab_status = args[0]
|
1559 |
-
answers = args[1:]
|
1560 |
-
|
1561 |
-
try:
|
1562 |
-
result = learning_style_quiz.evaluate_quiz(*answers)
|
1563 |
-
new_status = current_tab_status.copy()
|
1564 |
-
new_status[1] = True
|
1565 |
-
return (
|
1566 |
-
result,
|
1567 |
-
gr.update(visible=True),
|
1568 |
-
new_status,
|
1569 |
-
gr.update(elem_classes="completed-tab"),
|
1570 |
-
gr.update(interactive=True),
|
1571 |
-
gr.update(value="<div class='alert-box'>Quiz submitted successfully!</div>", visible=True),
|
1572 |
-
gr.update(visible=False)
|
1573 |
-
)
|
1574 |
-
except Exception as e:
|
1575 |
-
return (
|
1576 |
-
f"Error evaluating quiz: {str(e)}",
|
1577 |
-
gr.update(visible=True),
|
1578 |
-
current_tab_status,
|
1579 |
-
gr.update(),
|
1580 |
-
gr.update(),
|
1581 |
-
gr.update(value=f"<div class='error-message'>Error: {str(e)}</div>", visible=True),
|
1582 |
-
gr.update(visible=False)
|
1583 |
-
)
|
1584 |
-
|
1585 |
quiz_submit.click(
|
1586 |
-
fn=
|
1587 |
-
inputs=
|
1588 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1589 |
)
|
1590 |
|
1591 |
quiz_clear.click(
|
@@ -1624,42 +887,22 @@ def create_interface():
|
|
1624 |
character = gr.Textbox(label="Favorite Character (from any story)")
|
1625 |
character_reason = gr.Textbox(label="Why do you like them?", lines=2)
|
1626 |
|
1627 |
-
# Added blog section
|
1628 |
with gr.Accordion("Personal Blog (Optional)", open=False):
|
1629 |
blog = gr.Textbox(
|
1630 |
label="Share your thoughts",
|
1631 |
-
placeholder="Write something about yourself
|
1632 |
lines=5
|
1633 |
)
|
1634 |
|
1635 |
-
def save_personal_info(name, age, interests, current_tab_status):
|
1636 |
-
try:
|
1637 |
-
name = validate_name(name)
|
1638 |
-
age = validate_age(age)
|
1639 |
-
interests = sanitize_input(interests)
|
1640 |
-
|
1641 |
-
new_status = current_tab_status.copy()
|
1642 |
-
new_status[2] = True
|
1643 |
-
return (
|
1644 |
-
new_status,
|
1645 |
-
gr.update(elem_classes="completed-tab"),
|
1646 |
-
gr.update(interactive=True),
|
1647 |
-
gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True),
|
1648 |
-
gr.update(visible=False)
|
1649 |
-
)
|
1650 |
-
except Exception as e:
|
1651 |
-
return (
|
1652 |
-
current_tab_status,
|
1653 |
-
gr.update(),
|
1654 |
-
gr.update(),
|
1655 |
-
gr.update(visible=False),
|
1656 |
-
gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
|
1657 |
-
)
|
1658 |
-
|
1659 |
save_personal_btn.click(
|
1660 |
-
fn=
|
|
|
|
|
|
|
|
|
|
|
1661 |
inputs=[name, age, interests, tab_completed],
|
1662 |
-
outputs=[tab_completed, step3, step4, save_confirmation
|
1663 |
)
|
1664 |
|
1665 |
# ===== TAB 4: SAVE & REVIEW =====
|
@@ -1686,69 +929,24 @@ def create_interface():
|
|
1686 |
label="Profile Summary"
|
1687 |
)
|
1688 |
|
1689 |
-
def save_profile_and_update(name, age, interests, transcript_data, learning_style,
|
1690 |
-
movie, movie_reason, show, show_reason,
|
1691 |
-
book, book_reason, character, character_reason, blog,
|
1692 |
-
current_tab_status):
|
1693 |
-
try:
|
1694 |
-
summary = profile_manager.save_profile(
|
1695 |
-
name, age, interests, transcript_data, learning_style,
|
1696 |
-
movie, movie_reason, show, show_reason,
|
1697 |
-
book, book_reason, character, character_reason, blog
|
1698 |
-
)
|
1699 |
-
new_status = current_tab_status.copy()
|
1700 |
-
new_status[3] = True
|
1701 |
-
return (
|
1702 |
-
summary,
|
1703 |
-
new_status,
|
1704 |
-
gr.update(elem_classes="completed-tab"),
|
1705 |
-
gr.update(interactive=True),
|
1706 |
-
gr.update(visible=False)
|
1707 |
-
)
|
1708 |
-
except Exception as e:
|
1709 |
-
return (
|
1710 |
-
f"Error saving profile: {str(e)}",
|
1711 |
-
current_tab_status,
|
1712 |
-
gr.update(),
|
1713 |
-
gr.update(),
|
1714 |
-
gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
|
1715 |
-
)
|
1716 |
-
|
1717 |
save_btn.click(
|
1718 |
-
fn=
|
1719 |
inputs=[
|
1720 |
name, age, interests, transcript_data, learning_output,
|
1721 |
movie, movie_reason, show, show_reason,
|
1722 |
-
book, book_reason, character, character_reason, blog
|
1723 |
-
tab_completed
|
1724 |
],
|
1725 |
-
outputs=
|
1726 |
).then(
|
1727 |
-
fn=lambda:
|
1728 |
-
|
|
|
1729 |
).then(
|
1730 |
-
fn=lambda: gr.update(
|
1731 |
-
outputs=
|
1732 |
).then(
|
1733 |
-
fn=lambda: gr.update(
|
1734 |
-
outputs=
|
1735 |
-
)
|
1736 |
-
|
1737 |
-
def delete_profile(name, session_token):
|
1738 |
-
if not name:
|
1739 |
-
raise gr.Error("Please select a profile to delete")
|
1740 |
-
try:
|
1741 |
-
profile_path = profile_manager.get_profile_path(name)
|
1742 |
-
if profile_path.exists():
|
1743 |
-
profile_path.unlink()
|
1744 |
-
return "Profile deleted successfully", ""
|
1745 |
-
except Exception as e:
|
1746 |
-
raise gr.Error(f"Error deleting profile: {str(e)}")
|
1747 |
-
|
1748 |
-
delete_btn.click(
|
1749 |
-
fn=delete_profile,
|
1750 |
-
inputs=[load_profile_dropdown, session_token],
|
1751 |
-
outputs=[output_summary, load_profile_dropdown]
|
1752 |
).then(
|
1753 |
fn=lambda: profile_manager.list_profiles(session_token.value),
|
1754 |
outputs=load_profile_dropdown
|
@@ -1759,23 +957,12 @@ def create_interface():
|
|
1759 |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1760 |
outputs=delete_btn
|
1761 |
)
|
1762 |
-
|
1763 |
-
clear_btn.click(
|
1764 |
-
fn=lambda: [gr.update(value="") for _ in range(12)],
|
1765 |
-
outputs=[
|
1766 |
-
name, age, interests,
|
1767 |
-
movie, movie_reason, show, show_reason,
|
1768 |
-
book, book_reason, character, character_reason,
|
1769 |
-
output_summary
|
1770 |
-
]
|
1771 |
-
)
|
1772 |
|
1773 |
# ===== TAB 5: AI ASSISTANT =====
|
1774 |
with gr.Tab("AI Assistant", id=4):
|
1775 |
gr.Markdown("## Your Personalized Learning Assistant")
|
1776 |
gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
|
1777 |
|
1778 |
-
# Create a wrapper function that properly awaits the async function
|
1779 |
async def chat_wrapper(message: str, history: List[List[str]]):
|
1780 |
response = await teaching_assistant.generate_response(
|
1781 |
message,
|
@@ -1787,11 +974,10 @@ def create_interface():
|
|
1787 |
chatbot = gr.ChatInterface(
|
1788 |
fn=chat_wrapper,
|
1789 |
examples=[
|
1790 |
-
"
|
1791 |
-
"
|
1792 |
-
"
|
1793 |
-
"
|
1794 |
-
"What study methods match my learning style?"
|
1795 |
],
|
1796 |
title=""
|
1797 |
)
|
@@ -1800,11 +986,9 @@ def create_interface():
|
|
1800 |
def navigate_to_tab(tab_index: int, tab_completed_status):
|
1801 |
current_tab = tabs.selected
|
1802 |
|
1803 |
-
# Allow backward navigation
|
1804 |
if tab_index <= current_tab:
|
1805 |
return gr.Tabs(selected=tab_index), gr.update(visible=False)
|
1806 |
|
1807 |
-
# Check if current tab is completed
|
1808 |
if not tab_completed_status.get(current_tab, False):
|
1809 |
messages = {
|
1810 |
0: "Please complete the transcript analysis first.",
|
@@ -1822,7 +1006,6 @@ def create_interface():
|
|
1822 |
|
1823 |
return gr.Tabs(selected=tab_index), gr.update(visible=False)
|
1824 |
|
1825 |
-
# Connect navigation buttons
|
1826 |
step1.click(
|
1827 |
lambda idx, status: navigate_to_tab(idx, status),
|
1828 |
inputs=[gr.State(0), tab_completed],
|
@@ -1864,7 +1047,6 @@ def create_interface():
|
|
1864 |
|
1865 |
return app
|
1866 |
|
1867 |
-
# Create and launch the interface
|
1868 |
app = create_interface()
|
1869 |
|
1870 |
if __name__ == "__main__":
|
|
|
41 |
filename='transcript_parser.log'
|
42 |
)
|
43 |
|
44 |
+
# Model configuration - Using smaller model
|
45 |
+
MODEL_NAME = "deepseek-ai/deepseek-llm-1.3b"
|
46 |
|
47 |
# Initialize Hugging Face API
|
48 |
if HF_TOKEN:
|
|
|
52 |
except Exception as e:
|
53 |
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
# ========== MODEL LOADER ==========
|
56 |
class ModelLoader:
|
57 |
def __init__(self):
|
|
|
68 |
if progress:
|
69 |
progress(0.1, desc="Checking GPU availability...")
|
70 |
|
|
|
71 |
torch.cuda.empty_cache()
|
72 |
|
73 |
if progress:
|
|
|
81 |
if progress:
|
82 |
progress(0.5, desc="Loading model (this may take a few minutes)...")
|
83 |
|
|
|
84 |
model_kwargs = {
|
85 |
"trust_remote_code": True,
|
86 |
"torch_dtype": torch.float16 if self.device == "cuda" else torch.float32,
|
87 |
"device_map": "auto" if self.device == "cuda" else None,
|
88 |
"low_cpu_mem_usage": True,
|
89 |
+
"offload_folder": "offload"
|
90 |
}
|
91 |
|
92 |
try:
|
|
|
95 |
**model_kwargs
|
96 |
)
|
97 |
except torch.cuda.OutOfMemoryError:
|
|
|
98 |
model_kwargs["device_map"] = None
|
99 |
model = AutoModelForCausalLM.from_pretrained(
|
100 |
MODEL_NAME,
|
|
|
102 |
).to('cpu')
|
103 |
self.device = 'cpu'
|
104 |
|
|
|
105 |
test_input = tokenizer("Test", return_tensors="pt").to(self.device)
|
106 |
_ = model.generate(**test_input, max_new_tokens=1)
|
107 |
|
|
|
119 |
# Initialize model loader
|
120 |
model_loader = ModelLoader()
|
121 |
|
122 |
+
@lru_cache(maxsize=1)
|
123 |
+
def get_model_and_tokenizer():
|
124 |
+
return model_loader.load_model()
|
125 |
+
|
126 |
# ========== UTILITY FUNCTIONS ==========
|
127 |
def generate_session_token() -> str:
|
|
|
128 |
alphabet = string.ascii_letters + string.digits
|
129 |
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
|
130 |
|
131 |
def sanitize_input(text: str) -> str:
|
|
|
132 |
if not text:
|
133 |
return ""
|
|
|
134 |
text = html.escape(text.strip())
|
|
|
135 |
text = re.sub(r'<[^>]*>', '', text)
|
|
|
136 |
text = re.sub(r'[^\w\s\-.,!?@#\$%^&*()+=]', '', text)
|
137 |
return text
|
138 |
|
139 |
def validate_name(name: str) -> str:
|
|
|
140 |
name = name.strip()
|
141 |
if not name:
|
142 |
+
raise ValueError("Name cannot be empty.")
|
143 |
if len(name) > 100:
|
144 |
raise ValueError("Name is too long (maximum 100 characters).")
|
145 |
if any(c.isdigit() for c in name):
|
|
|
147 |
return name
|
148 |
|
149 |
def validate_age(age: Union[int, float, str]) -> int:
|
|
|
150 |
try:
|
151 |
age_int = int(age)
|
152 |
if not MIN_AGE <= age_int <= MAX_AGE:
|
|
|
156 |
raise ValueError("Please enter a valid age number.")
|
157 |
|
158 |
def validate_file(file_obj) -> None:
|
|
|
159 |
if not file_obj:
|
160 |
raise ValueError("Please upload a file first")
|
161 |
|
|
|
163 |
if file_ext not in ALLOWED_FILE_TYPES:
|
164 |
raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
|
165 |
|
166 |
+
file_size = os.path.getsize(file_obj.name) / (1024 * 1024)
|
167 |
if file_size > MAX_FILE_SIZE_MB:
|
168 |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
|
169 |
|
170 |
# ========== TEXT EXTRACTION FUNCTIONS ==========
|
171 |
def extract_text_from_file(file_path: str, file_ext: str) -> str:
|
|
|
172 |
text = ""
|
173 |
|
174 |
try:
|
175 |
if file_ext == '.pdf':
|
|
|
176 |
try:
|
177 |
doc = fitz.open(file_path)
|
178 |
for page in doc:
|
179 |
text += page.get_text("text") + '\n'
|
180 |
if not text.strip():
|
181 |
+
raise ValueError("PyMuPDF returned empty text")
|
182 |
except Exception as e:
|
183 |
logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
|
184 |
text = extract_text_from_pdf_with_ocr(file_path)
|
|
|
186 |
elif file_ext in ['.png', '.jpg', '.jpeg']:
|
187 |
text = extract_text_with_ocr(file_path)
|
188 |
|
|
|
189 |
text = clean_extracted_text(text)
|
190 |
|
191 |
if not text.strip():
|
192 |
+
raise ValueError("No text could be extracted.")
|
193 |
|
194 |
return text
|
195 |
|
196 |
except Exception as e:
|
197 |
logging.error(f"Text extraction error: {str(e)}")
|
198 |
+
raise gr.Error(f"Failed to extract text: {str(e)}")
|
199 |
|
200 |
def extract_text_from_pdf_with_ocr(file_path: str) -> str:
|
|
|
201 |
text = ""
|
202 |
try:
|
203 |
doc = fitz.open(file_path)
|
204 |
for page in doc:
|
205 |
pix = page.get_pixmap()
|
206 |
img = Image.open(io.BytesIO(pix.tobytes()))
|
207 |
+
img = img.convert('L')
|
208 |
+
img = img.point(lambda x: 0 if x < 128 else 255)
|
|
|
209 |
text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
|
210 |
except Exception as e:
|
211 |
+
raise ValueError(f"PDF OCR failed: {str(e)}")
|
212 |
return text
|
213 |
|
214 |
def extract_text_with_ocr(file_path: str) -> str:
|
|
|
215 |
try:
|
216 |
image = Image.open(file_path)
|
217 |
+
image = image.convert('L')
|
218 |
+
image = image.point(lambda x: 0 if x < 128 else 255, '1')
|
|
|
|
|
|
|
|
|
219 |
custom_config = r'--oem 3 --psm 6'
|
220 |
text = pytesseract.image_to_string(image, config=custom_config)
|
221 |
return text
|
222 |
except Exception as e:
|
223 |
+
raise ValueError(f"OCR processing failed: {str(e)}")
|
224 |
|
225 |
def clean_extracted_text(text: str) -> str:
|
|
|
|
|
226 |
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
|
227 |
replacements = {
|
228 |
'|': 'I',
|
229 |
'‘': "'",
|
|
|
233 |
'fi': 'fi',
|
234 |
'fl': 'fl'
|
235 |
}
|
|
|
236 |
for wrong, right in replacements.items():
|
237 |
text = text.replace(wrong, right)
|
|
|
238 |
return text
|
239 |
|
240 |
def remove_sensitive_info(text: str) -> str:
|
|
|
|
|
241 |
text = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED]', text)
|
|
|
242 |
text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
|
|
|
243 |
text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
|
244 |
return text
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
# ========== TRANSCRIPT PARSING ==========
|
247 |
class TranscriptParser:
|
248 |
def __init__(self):
|
|
|
253 |
self.graduation_status = {}
|
254 |
|
255 |
def parse_transcript(self, text: str) -> Dict:
|
256 |
+
"""Simplified transcript parser that extracts key information"""
|
257 |
try:
|
258 |
+
parsed_data = {
|
259 |
+
'student_info': {},
|
260 |
+
'course_history': []
|
261 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
+
# Extract student information
|
264 |
+
name_match = re.search(r'(?:Name|Student)[:\s]+([A-Za-z,\s]+)', text, re.IGNORECASE)
|
265 |
+
if name_match:
|
266 |
+
parsed_data['student_info']['name'] = name_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
+
id_match = re.search(r'(?:ID|Student\s*ID)[:\s]+([A-Za-z0-9-]+)', text, re.IGNORECASE)
|
269 |
+
if id_match:
|
270 |
+
parsed_data['student_info']['id'] = id_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
+
gpa_match = re.search(r'(?:GPA|Grade\s*Point\s*Average)[:\s]+([0-9.]+)', text, re.IGNORECASE)
|
273 |
+
if gpa_match:
|
274 |
+
parsed_data['student_info']['gpa'] = float(gpa_match.group(1))
|
|
|
|
|
275 |
|
276 |
+
# Extract courses (simplified pattern)
|
277 |
+
course_pattern = r'([A-Z]{2,4}\s?\d{3})\s+(.*?)\s+([A-F][+-]?)\s+([0-9.]+)'
|
278 |
+
courses = re.findall(course_pattern, text)
|
279 |
+
for course in courses:
|
280 |
+
parsed_data['course_history'].append({
|
281 |
+
'course_code': course[0],
|
282 |
+
'description': course[1],
|
283 |
+
'grade': course[2],
|
284 |
+
'credits': float(course[3])
|
285 |
+
})
|
286 |
|
287 |
+
return parsed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
+
except Exception as e:
|
290 |
+
logging.error(f"Error parsing transcript: {str(e)}")
|
291 |
+
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
294 |
+
"""Process transcript file and return simple confirmation"""
|
295 |
try:
|
296 |
if not file_obj:
|
297 |
raise ValueError("Please upload a file first")
|
|
|
299 |
validate_file(file_obj)
|
300 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
301 |
|
|
|
302 |
if progress:
|
303 |
progress(0.2, desc="Extracting text from file...")
|
304 |
|
305 |
text = extract_text_from_file(file_obj.name, file_ext)
|
306 |
|
307 |
if not text.strip():
|
308 |
+
raise ValueError("No text could be extracted from the file.")
|
309 |
|
|
|
310 |
if progress:
|
311 |
+
progress(0.5, desc="Parsing transcript...")
|
312 |
|
313 |
parser = TranscriptParser()
|
314 |
+
parsed_data = parser.parse_transcript(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
|
316 |
+
# Return simple confirmation message
|
317 |
+
confirmation = "Transcript processed successfully."
|
318 |
+
if 'gpa' in parsed_data.get('student_info', {}):
|
319 |
+
confirmation += f"\nGPA detected: {parsed_data['student_info']['gpa']}"
|
320 |
+
|
321 |
+
return confirmation, parsed_data
|
322 |
|
323 |
except Exception as e:
|
324 |
error_msg = f"Error processing transcript: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
logging.error(error_msg)
|
326 |
return error_msg, None
|
327 |
|
|
|
434 |
}
|
435 |
|
436 |
def evaluate_quiz(self, *answers) -> str:
|
437 |
+
"""Evaluate quiz answers and return learning style results"""
|
438 |
+
answers = list(answers)
|
439 |
if len(answers) != len(self.questions):
|
440 |
raise gr.Error("Please answer all questions before submitting")
|
441 |
|
|
|
443 |
|
444 |
for i, answer in enumerate(answers):
|
445 |
if not answer:
|
446 |
+
continue
|
447 |
|
448 |
for j, style in enumerate(self.learning_styles):
|
449 |
if answer == self.options[i][j]:
|
|
|
457 |
percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
|
458 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
459 |
|
|
|
460 |
result = "## Your Learning Style Results\n\n"
|
461 |
result += "### Scores:\n"
|
462 |
for style, score in sorted_styles:
|
|
|
482 |
for career in style_info['careers'][:6]:
|
483 |
result += f"- {career}\n"
|
484 |
|
|
|
485 |
complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
|
486 |
result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
|
487 |
for tip in self.learning_styles[complementary]['tips'][:3]:
|
|
|
504 |
|
505 |
return result
|
506 |
|
|
|
507 |
learning_style_quiz = LearningStyleQuiz()
|
508 |
|
509 |
# ========== PROFILE MANAGEMENT ==========
|
|
|
514 |
self.current_session = None
|
515 |
|
516 |
def set_session(self, session_token: str) -> None:
|
|
|
517 |
self.current_session = session_token
|
518 |
|
519 |
def get_profile_path(self, name: str) -> Path:
|
|
|
520 |
if self.current_session:
|
|
|
521 |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
522 |
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
|
523 |
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
|
|
|
527 |
movie: str, movie_reason: str, show: str, show_reason: str,
|
528 |
book: str, book_reason: str, character: str, character_reason: str,
|
529 |
blog: str) -> str:
|
|
|
530 |
try:
|
531 |
+
name = validate_name(name)
|
532 |
+
age = validate_age(age)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
|
534 |
if not interests.strip():
|
535 |
raise ValueError("Please describe at least one interest or hobby.")
|
|
|
537 |
if not transcript:
|
538 |
raise ValueError("Please complete the transcript analysis first.")
|
539 |
|
|
|
540 |
if not learning_style or "Your primary learning style is:" not in learning_style:
|
541 |
raise ValueError("Please complete the learning style quiz first.")
|
542 |
|
|
|
543 |
favorites = {
|
544 |
"movie": sanitize_input(movie),
|
545 |
"movie_reason": sanitize_input(movie_reason),
|
|
|
551 |
"character_reason": sanitize_input(character_reason)
|
552 |
}
|
553 |
|
|
|
554 |
data = {
|
555 |
"name": name,
|
556 |
+
"age": age,
|
557 |
"interests": sanitize_input(interests),
|
558 |
+
"transcript": transcript,
|
559 |
+
"learning_style": learning_style,
|
560 |
"favorites": favorites,
|
561 |
"blog": sanitize_input(blog) if blog else "",
|
562 |
"session_token": self.current_session,
|
563 |
"last_updated": time.time()
|
564 |
}
|
565 |
|
|
|
566 |
filepath = self.get_profile_path(name)
|
567 |
|
568 |
with open(filepath, "w", encoding='utf-8') as f:
|
569 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
570 |
|
|
|
571 |
if HF_TOKEN and 'hf_api' in globals():
|
572 |
try:
|
573 |
hf_api.upload_file(
|
|
|
579 |
except Exception as e:
|
580 |
logging.error(f"Failed to upload to HF Hub: {str(e)}")
|
581 |
|
582 |
+
# Return simple confirmation with GPA if available
|
583 |
+
confirmation = f"Profile saved successfully for {name}."
|
584 |
+
if 'gpa' in data.get('transcript', {}).get('student_info', {}):
|
585 |
+
confirmation += f"\nGPA: {data['transcript']['student_info']['gpa']}"
|
586 |
+
return confirmation
|
587 |
|
588 |
except Exception as e:
|
589 |
logging.error(f"Profile validation error: {str(e)}")
|
590 |
raise gr.Error(f"Couldn't save profile: {str(e)}")
|
591 |
+
|
592 |
def load_profile(self, name: str = None, session_token: str = None) -> Dict:
|
|
|
593 |
try:
|
594 |
if session_token:
|
595 |
profile_pattern = f"*{session_token}_profile.json"
|
|
|
601 |
return {}
|
602 |
|
603 |
if name:
|
|
|
604 |
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
605 |
if session_token:
|
606 |
profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json"
|
|
|
608 |
profile_file = self.profiles_dir / f"{name_hash}_profile.json"
|
609 |
|
610 |
if not profile_file.exists():
|
|
|
611 |
if HF_TOKEN and 'hf_api' in globals():
|
612 |
try:
|
613 |
hf_api.download_file(
|
|
|
621 |
else:
|
622 |
raise gr.Error(f"No profile found for {name}")
|
623 |
else:
|
|
|
624 |
profile_file = profiles[0]
|
625 |
|
626 |
with open(profile_file, "r", encoding='utf-8') as f:
|
627 |
profile_data = json.load(f)
|
|
|
628 |
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
|
629 |
raise gr.Error("Session expired. Please start a new session.")
|
630 |
return profile_data
|
|
|
634 |
return {}
|
635 |
|
636 |
def list_profiles(self, session_token: str = None) -> List[str]:
|
|
|
637 |
if session_token:
|
638 |
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
|
639 |
else:
|
640 |
profiles = list(self.profiles_dir.glob("*.json"))
|
641 |
|
|
|
642 |
profile_names = []
|
643 |
for p in profiles:
|
644 |
with open(p, "r", encoding='utf-8') as f:
|
|
|
649 |
continue
|
650 |
|
651 |
return profile_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
652 |
|
|
|
653 |
profile_manager = ProfileManager()
|
654 |
|
655 |
# ========== AI TEACHING ASSISTANT ==========
|
656 |
class TeachingAssistant:
|
657 |
def __init__(self):
|
658 |
self.context_history = []
|
659 |
+
self.max_context_length = 5
|
660 |
|
661 |
async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
|
|
|
662 |
try:
|
|
|
663 |
profile = profile_manager.load_profile(session_token=session_token)
|
664 |
if not profile:
|
665 |
+
return "Please complete and save your profile first."
|
666 |
|
|
|
667 |
self._update_context(message, history)
|
668 |
|
669 |
+
# Focus on GPA if mentioned
|
670 |
+
if "gpa" in message.lower():
|
671 |
+
gpa = profile.get("transcript", {}).get("student_info", {}).get("gpa", "unknown")
|
672 |
+
return f"Your GPA is {gpa}. Would you like advice on improving it?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
|
674 |
+
# Generic response otherwise
|
675 |
+
return "I'm your learning assistant. Ask me about your GPA, courses, or study tips."
|
|
|
|
|
|
|
|
|
|
|
676 |
|
677 |
except Exception as e:
|
678 |
logging.error(f"Error generating response: {str(e)}")
|
679 |
+
return "I encountered an error. Please try again."
|
680 |
|
681 |
def _update_context(self, message: str, history: List[List[Union[str, None]]]) -> None:
|
|
|
682 |
self.context_history.append({"role": "user", "content": message})
|
683 |
if history:
|
684 |
for h in history[-self.max_context_length:]:
|
685 |
+
if h[0]:
|
686 |
self.context_history.append({"role": "user", "content": h[0]})
|
687 |
+
if h[1]:
|
688 |
self.context_history.append({"role": "assistant", "content": h[1]})
|
689 |
|
|
|
690 |
self.context_history = self.context_history[-(self.max_context_length*2):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
691 |
|
|
|
692 |
teaching_assistant = TeachingAssistant()
|
693 |
|
694 |
# ========== GRADIO INTERFACE ==========
|
695 |
def create_interface():
|
696 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
|
|
697 |
session_token = gr.State(value=generate_session_token())
|
698 |
profile_manager.set_session(session_token.value)
|
699 |
|
|
|
700 |
tab_completed = gr.State({
|
701 |
0: False, # Transcript Upload
|
702 |
1: False, # Learning Style Quiz
|
|
|
705 |
4: False # AI Assistant
|
706 |
})
|
707 |
|
708 |
+
# Custom CSS
|
709 |
app.css = """
|
710 |
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
|
711 |
.tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
|
|
|
718 |
.quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
|
719 |
.error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
|
720 |
|
|
|
721 |
.dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
|
722 |
.dark .quiz-question { background-color: #3d3d3d !important; }
|
723 |
.dark .quiz-results { background-color: #2e3d2e !important; }
|
|
|
727 |
.dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
|
728 |
"""
|
729 |
|
730 |
+
# Header
|
731 |
with gr.Row():
|
732 |
with gr.Column(scale=4):
|
733 |
gr.Markdown("""
|
|
|
753 |
|
754 |
nav_message = gr.HTML(visible=False)
|
755 |
|
756 |
+
# Main tabs
|
757 |
with gr.Tabs(visible=True) as tabs:
|
758 |
# ===== TAB 1: TRANSCRIPT UPLOAD =====
|
759 |
with gr.Tab("Transcript", id=0):
|
|
|
772 |
with gr.Column(scale=2):
|
773 |
transcript_output = gr.Textbox(
|
774 |
label="Analysis Results",
|
775 |
+
lines=5,
|
776 |
interactive=False
|
777 |
)
|
778 |
transcript_data = gr.State()
|
779 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
780 |
upload_btn.click(
|
781 |
+
fn=parse_transcript,
|
782 |
inputs=[file_input, tab_completed],
|
783 |
+
outputs=[transcript_output, transcript_data]
|
784 |
+
).then(
|
785 |
+
fn=lambda: {0: True},
|
786 |
+
inputs=None,
|
787 |
+
outputs=tab_completed
|
788 |
+
).then(
|
789 |
+
fn=lambda: gr.update(elem_classes="completed-tab"),
|
790 |
+
outputs=step1
|
791 |
+
).then(
|
792 |
+
fn=lambda: gr.update(interactive=True),
|
793 |
+
outputs=step2
|
794 |
)
|
795 |
|
796 |
# ===== TAB 2: LEARNING STYLE QUIZ =====
|
|
|
821 |
elem_classes="quiz-results"
|
822 |
)
|
823 |
|
|
|
824 |
for component in quiz_components:
|
825 |
component.change(
|
826 |
fn=lambda *answers: {
|
|
|
832 |
outputs=progress
|
833 |
)
|
834 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
835 |
quiz_submit.click(
|
836 |
+
fn=lambda *answers: learning_style_quiz.evaluate_quiz(*answers),
|
837 |
+
inputs=quiz_components,
|
838 |
+
outputs=learning_output
|
839 |
+
).then(
|
840 |
+
fn=lambda: gr.update(visible=True),
|
841 |
+
outputs=learning_output
|
842 |
+
).then(
|
843 |
+
fn=lambda: {1: True},
|
844 |
+
inputs=None,
|
845 |
+
outputs=tab_completed
|
846 |
+
).then(
|
847 |
+
fn=lambda: gr.update(elem_classes="completed-tab"),
|
848 |
+
outputs=step2
|
849 |
+
).then(
|
850 |
+
fn=lambda: gr.update(interactive=True),
|
851 |
+
outputs=step3
|
852 |
)
|
853 |
|
854 |
quiz_clear.click(
|
|
|
887 |
character = gr.Textbox(label="Favorite Character (from any story)")
|
888 |
character_reason = gr.Textbox(label="Why do you like them?", lines=2)
|
889 |
|
|
|
890 |
with gr.Accordion("Personal Blog (Optional)", open=False):
|
891 |
blog = gr.Textbox(
|
892 |
label="Share your thoughts",
|
893 |
+
placeholder="Write something about yourself...",
|
894 |
lines=5
|
895 |
)
|
896 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
897 |
save_personal_btn.click(
|
898 |
+
fn=lambda n, a, i, ts: (
|
899 |
+
{2: True},
|
900 |
+
gr.update(elem_classes="completed-tab"),
|
901 |
+
gr.update(interactive=True),
|
902 |
+
gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True)
|
903 |
+
),
|
904 |
inputs=[name, age, interests, tab_completed],
|
905 |
+
outputs=[tab_completed, step3, step4, save_confirmation]
|
906 |
)
|
907 |
|
908 |
# ===== TAB 4: SAVE & REVIEW =====
|
|
|
929 |
label="Profile Summary"
|
930 |
)
|
931 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
932 |
save_btn.click(
|
933 |
+
fn=profile_manager.save_profile,
|
934 |
inputs=[
|
935 |
name, age, interests, transcript_data, learning_output,
|
936 |
movie, movie_reason, show, show_reason,
|
937 |
+
book, book_reason, character, character_reason, blog
|
|
|
938 |
],
|
939 |
+
outputs=output_summary
|
940 |
).then(
|
941 |
+
fn=lambda: {3: True},
|
942 |
+
inputs=None,
|
943 |
+
outputs=tab_completed
|
944 |
).then(
|
945 |
+
fn=lambda: gr.update(elem_classes="completed-tab"),
|
946 |
+
outputs=step4
|
947 |
).then(
|
948 |
+
fn=lambda: gr.update(interactive=True),
|
949 |
+
outputs=step5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
950 |
).then(
|
951 |
fn=lambda: profile_manager.list_profiles(session_token.value),
|
952 |
outputs=load_profile_dropdown
|
|
|
957 |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
958 |
outputs=delete_btn
|
959 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
960 |
|
961 |
# ===== TAB 5: AI ASSISTANT =====
|
962 |
with gr.Tab("AI Assistant", id=4):
|
963 |
gr.Markdown("## Your Personalized Learning Assistant")
|
964 |
gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
|
965 |
|
|
|
966 |
async def chat_wrapper(message: str, history: List[List[str]]):
|
967 |
response = await teaching_assistant.generate_response(
|
968 |
message,
|
|
|
974 |
chatbot = gr.ChatInterface(
|
975 |
fn=chat_wrapper,
|
976 |
examples=[
|
977 |
+
"What's my GPA?",
|
978 |
+
"How should I study for math?",
|
979 |
+
"What courses am I taking?",
|
980 |
+
"Study tips for my learning style"
|
|
|
981 |
],
|
982 |
title=""
|
983 |
)
|
|
|
986 |
def navigate_to_tab(tab_index: int, tab_completed_status):
|
987 |
current_tab = tabs.selected
|
988 |
|
|
|
989 |
if tab_index <= current_tab:
|
990 |
return gr.Tabs(selected=tab_index), gr.update(visible=False)
|
991 |
|
|
|
992 |
if not tab_completed_status.get(current_tab, False):
|
993 |
messages = {
|
994 |
0: "Please complete the transcript analysis first.",
|
|
|
1006 |
|
1007 |
return gr.Tabs(selected=tab_index), gr.update(visible=False)
|
1008 |
|
|
|
1009 |
step1.click(
|
1010 |
lambda idx, status: navigate_to_tab(idx, status),
|
1011 |
inputs=[gr.State(0), tab_completed],
|
|
|
1047 |
|
1048 |
return app
|
1049 |
|
|
|
1050 |
app = create_interface()
|
1051 |
|
1052 |
if __name__ == "__main__":
|