Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -272,115 +272,184 @@ class TranscriptParser:
|
|
272 |
self.requirements = {}
|
273 |
self.current_courses = []
|
274 |
self.course_history = []
|
|
|
275 |
|
276 |
def parse_transcript(self, text: str) -> Dict:
|
277 |
-
"""
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
|
291 |
def _extract_student_info(self, text: str):
|
292 |
-
"""Enhanced student info extraction
|
293 |
-
#
|
294 |
-
|
295 |
-
|
296 |
-
r"(\d{7})\s*[-]?\s*([\w\s,]+?)\s*"
|
297 |
-
r"(?:\||Cohort\s*\w+\s*\||Un-weighted\s*GPA\s*([\d.]+)\s*\||Comm\s*Serv\s*Hours\s*(\d+))?"
|
298 |
-
)
|
299 |
|
300 |
-
|
301 |
-
if header_match:
|
302 |
self.student_data = {
|
303 |
-
"id":
|
304 |
-
"name":
|
305 |
-
"
|
306 |
-
"
|
307 |
}
|
308 |
|
309 |
-
#
|
310 |
-
|
311 |
-
|
312 |
-
r"(?:\||YOG\s*[:]?\s*(\d{4})\s*\||Weighted\s*GPA\s*([\d.]+)\s*\||Total\s*Credits\s*Earned\s*([\d.]+))?"
|
313 |
-
)
|
314 |
|
315 |
-
|
316 |
-
if grade_match:
|
317 |
self.student_data.update({
|
318 |
-
"
|
319 |
-
"
|
320 |
-
|
321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
})
|
323 |
|
324 |
def _extract_requirements(self, text: str):
|
325 |
-
"""Parse the graduation requirements section"""
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
)
|
330 |
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
}
|
338 |
|
339 |
def _extract_course_history(self, text: str):
|
340 |
-
"""Parse the detailed course history"""
|
341 |
-
|
342 |
-
|
343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
)
|
345 |
|
346 |
-
|
|
|
|
|
347 |
self.course_history.append({
|
348 |
-
"requirement_category":
|
349 |
-
"school_year":
|
350 |
-
"grade_level":
|
351 |
-
"course_code":
|
352 |
-
"description":
|
353 |
-
"term":
|
354 |
-
"district_number":
|
355 |
-
"grade":
|
356 |
-
"inclusion_status":
|
357 |
-
"credits":
|
358 |
})
|
359 |
|
360 |
def _extract_current_courses(self, text: str):
|
361 |
"""Identify courses currently in progress"""
|
362 |
-
in_progress = [c for c in self.course_history if "inProgress" in c["credits"]]
|
363 |
self.current_courses = [
|
364 |
{
|
365 |
"course": c["description"],
|
|
|
366 |
"category": c["requirement_category"],
|
367 |
"term": c["term"],
|
368 |
-
"credits": c["credits"]
|
|
|
369 |
}
|
370 |
-
for c in
|
|
|
371 |
]
|
372 |
|
373 |
-
def _calculate_completion(self)
|
374 |
-
"""Calculate overall completion status"""
|
375 |
total_required = sum(req["required"] for req in self.requirements.values())
|
376 |
total_completed = sum(req["completed"] for req in self.requirements.values())
|
377 |
|
378 |
-
|
379 |
-
"
|
380 |
-
"
|
381 |
"percent_complete": round((total_completed / total_required) * 100, 1),
|
382 |
-
"remaining_credits": total_required - total_completed
|
383 |
-
|
|
|
384 |
|
385 |
def to_json(self) -> str:
|
386 |
"""Export parsed data as JSON"""
|
@@ -389,51 +458,92 @@ class TranscriptParser:
|
|
389 |
"requirements": self.requirements,
|
390 |
"current_courses": self.current_courses,
|
391 |
"course_history": self.course_history,
|
392 |
-
"
|
393 |
}, indent=2)
|
394 |
|
395 |
-
def
|
396 |
-
"""
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
try:
|
|
|
403 |
if progress:
|
404 |
-
progress(0.1, desc="
|
|
|
405 |
parser = TranscriptParser()
|
406 |
parsed_data = parser.parse_transcript(text)
|
|
|
407 |
if progress:
|
408 |
-
progress(0.
|
409 |
-
|
410 |
-
# Convert to expected format
|
411 |
-
formatted_data = {
|
412 |
-
"grade_level": parsed_data["student_info"].get("current_grade", "Unknown"),
|
413 |
-
"gpa": {
|
414 |
-
"weighted": parsed_data["student_info"].get("weighted_gpa", "N/A"),
|
415 |
-
"unweighted": parsed_data["student_info"].get("unweighted_gpa", "N/A")
|
416 |
-
},
|
417 |
-
"courses": []
|
418 |
-
}
|
419 |
|
420 |
-
|
421 |
-
for course in parsed_data["course_history"]:
|
422 |
-
formatted_data["courses"].append({
|
423 |
-
"code": course["course_code"],
|
424 |
-
"name": course["description"],
|
425 |
-
"grade": course["grade"],
|
426 |
-
"credits": course["credits"],
|
427 |
-
"year": course["school_year"],
|
428 |
-
"grade_level": course["grade_level"]
|
429 |
-
})
|
430 |
|
431 |
-
if progress:
|
432 |
-
progress(1.0)
|
433 |
-
return formatted_data
|
434 |
-
|
435 |
except Exception as e:
|
436 |
logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
|
|
|
437 |
# Fall back to AI parsing if structured parsing fails
|
438 |
return parse_transcript_with_ai_fallback(text, progress)
|
439 |
|
@@ -447,6 +557,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
447 |
- Current grade level
|
448 |
- Weighted GPA (if available)
|
449 |
- Unweighted GPA (if available)
|
|
|
|
|
450 |
- List of all courses with:
|
451 |
* Course code
|
452 |
* Course name
|
@@ -454,6 +566,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
454 |
* Credits earned
|
455 |
* Year/semester taken
|
456 |
* Grade level when taken
|
|
|
457 |
Return the data in JSON format.
|
458 |
|
459 |
Transcript Text:
|
@@ -464,6 +577,10 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
464 |
if progress:
|
465 |
progress(0.1, desc="Processing transcript with AI...")
|
466 |
|
|
|
|
|
|
|
|
|
467 |
# Tokenize and generate response
|
468 |
inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
|
469 |
if progress:
|
@@ -471,7 +588,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
471 |
|
472 |
outputs = model.generate(
|
473 |
**inputs,
|
474 |
-
max_new_tokens=
|
475 |
temperature=0.1,
|
476 |
do_sample=True
|
477 |
)
|
@@ -500,43 +617,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
500 |
logging.error(f"AI parsing error: {str(e)}")
|
501 |
raise gr.Error(f"Error processing transcript: {str(e)}")
|
502 |
|
503 |
-
def format_transcript_output(data: Dict) -> str:
|
504 |
-
"""Format the parsed data into human-readable text."""
|
505 |
-
output = []
|
506 |
-
output.append(f"Student Transcript Summary\n{'='*40}")
|
507 |
-
output.append(f"Current Grade Level: {data.get('grade_level', 'Unknown')}")
|
508 |
-
|
509 |
-
if 'gpa' in data:
|
510 |
-
output.append(f"\nGPA:")
|
511 |
-
output.append(f"- Weighted: {data['gpa'].get('weighted', 'N/A')}")
|
512 |
-
output.append(f"- Unweighted: {data['gpa'].get('unweighted', 'N/A')}")
|
513 |
-
|
514 |
-
if 'courses' in data:
|
515 |
-
output.append("\nCourse History:\n" + '='*40)
|
516 |
-
|
517 |
-
# Group courses by grade level
|
518 |
-
courses_by_grade = defaultdict(list)
|
519 |
-
for course in data['courses']:
|
520 |
-
grade_level = course.get('grade_level', 'Unknown')
|
521 |
-
courses_by_grade[grade_level].append(course)
|
522 |
-
|
523 |
-
# Sort grades numerically
|
524 |
-
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
525 |
-
output.append(f"\nGrade {grade}:\n{'-'*30}")
|
526 |
-
for course in courses_by_grade[grade]:
|
527 |
-
course_str = f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
|
528 |
-
if 'grade' in course:
|
529 |
-
course_str += f" (Grade: {course['grade']})"
|
530 |
-
if 'credits' in course:
|
531 |
-
course_str += f" | Credits: {course['credits']}"
|
532 |
-
if 'year' in course:
|
533 |
-
course_str += f" | Year: {course['year']}"
|
534 |
-
output.append(course_str)
|
535 |
-
|
536 |
-
return '\n'.join(output)
|
537 |
-
|
538 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
539 |
-
"""Main function to parse transcript files
|
540 |
try:
|
541 |
if not file_obj:
|
542 |
raise ValueError("Please upload a file first")
|
@@ -544,32 +626,40 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
|
|
544 |
validate_file(file_obj)
|
545 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
546 |
|
547 |
-
# Extract text from file
|
|
|
|
|
|
|
548 |
text = extract_text_from_file(file_obj.name, file_ext)
|
549 |
|
550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
parsed_data = parse_transcript_with_ai(text, progress)
|
552 |
|
553 |
# Format output text
|
554 |
-
|
555 |
-
|
556 |
-
# Prepare the data structure for saving
|
557 |
-
transcript_data = {
|
558 |
-
"grade_level": parsed_data.get('grade_level', 'Unknown'),
|
559 |
-
"gpa": parsed_data.get('gpa', {}),
|
560 |
-
"courses": defaultdict(list)
|
561 |
-
}
|
562 |
|
563 |
-
|
564 |
-
for course in parsed_data.get('courses', []):
|
565 |
-
grade_level = course.get('grade_level', 'Unknown')
|
566 |
-
transcript_data["courses"][grade_level].append(course)
|
567 |
|
568 |
-
return output_text,
|
569 |
|
570 |
except Exception as e:
|
571 |
-
|
572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
573 |
|
574 |
# ========== LEARNING STYLE QUIZ ==========
|
575 |
class LearningStyleQuiz:
|
@@ -1449,7 +1539,7 @@ def create_interface():
|
|
1449 |
"Your profile summary will appear here after saving.",
|
1450 |
label="Profile Summary"
|
1451 |
)
|
1452 |
-
blog = gr.Textbox(label="Personal Blog", visible=False)
|
1453 |
|
1454 |
def save_profile_and_update(name, age, interests, transcript_data, learning_style,
|
1455 |
movie, movie_reason, show, show_reason,
|
@@ -1606,3 +1696,4 @@ app = create_interface()
|
|
1606 |
|
1607 |
if __name__ == "__main__":
|
1608 |
app.launch()
|
|
|
|
272 |
self.requirements = {}
|
273 |
self.current_courses = []
|
274 |
self.course_history = []
|
275 |
+
self.graduation_status = {}
|
276 |
|
277 |
def parse_transcript(self, text: str) -> Dict:
|
278 |
+
"""Enhanced parsing method for Miami-Dade format"""
|
279 |
+
try:
|
280 |
+
# First normalize the text (replace multiple spaces, normalize line breaks)
|
281 |
+
text = re.sub(r'\s+', ' ', text)
|
282 |
+
|
283 |
+
# Extract student info with more flexible patterns
|
284 |
+
self._extract_student_info(text)
|
285 |
+
|
286 |
+
# Extract requirements with better table parsing
|
287 |
+
self._extract_requirements(text)
|
288 |
+
|
289 |
+
# Extract course history with improved pattern matching
|
290 |
+
self._extract_course_history(text)
|
291 |
+
|
292 |
+
# Identify current courses
|
293 |
+
self._extract_current_courses(text)
|
294 |
+
|
295 |
+
# Calculate completion status
|
296 |
+
self._calculate_completion()
|
297 |
+
|
298 |
+
return {
|
299 |
+
"student_info": self.student_data,
|
300 |
+
"requirements": self.requirements,
|
301 |
+
"current_courses": self.current_courses,
|
302 |
+
"course_history": self.course_history,
|
303 |
+
"graduation_status": self.graduation_status
|
304 |
+
}
|
305 |
+
|
306 |
+
except Exception as e:
|
307 |
+
logging.error(f"Error parsing transcript: {str(e)}")
|
308 |
+
raise gr.Error(f"Error parsing transcript: {str(e)}")
|
309 |
|
310 |
def _extract_student_info(self, text: str):
|
311 |
+
"""Enhanced student info extraction for Miami-Dade format"""
|
312 |
+
# Extract basic student info
|
313 |
+
student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
|
314 |
+
student_match = re.search(student_pattern, text, re.IGNORECASE)
|
|
|
|
|
|
|
315 |
|
316 |
+
if student_match:
|
|
|
317 |
self.student_data = {
|
318 |
+
"id": student_match.group(1),
|
319 |
+
"name": student_match.group(2).replace(",", ", "),
|
320 |
+
"current_grade": student_match.group(3),
|
321 |
+
"graduation_year": student_match.group(4)
|
322 |
}
|
323 |
|
324 |
+
# Extract GPA info
|
325 |
+
gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
|
326 |
+
gpa_match = re.search(gpa_pattern, text, re.IGNORECASE)
|
|
|
|
|
327 |
|
328 |
+
if gpa_match:
|
|
|
329 |
self.student_data.update({
|
330 |
+
"unweighted_gpa": float(gpa_match.group(1)),
|
331 |
+
"weighted_gpa": float(gpa_match.group(2))
|
332 |
+
})
|
333 |
+
|
334 |
+
# Extract credits and service hours
|
335 |
+
credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
|
336 |
+
credits_match = re.search(credits_pattern, text, re.IGNORECASE)
|
337 |
+
|
338 |
+
if credits_match:
|
339 |
+
self.student_data.update({
|
340 |
+
"total_credits": float(credits_match.group(1)),
|
341 |
+
"community_service_hours": int(credits_match.group(2))
|
342 |
})
|
343 |
|
344 |
def _extract_requirements(self, text: str):
|
345 |
+
"""Parse the graduation requirements section with improved table parsing"""
|
346 |
+
# Find the requirements table
|
347 |
+
req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
|
348 |
+
if not req_table_start:
|
349 |
+
raise ValueError("Could not find requirements table header")
|
350 |
+
|
351 |
+
req_text = text[req_table_start.start():]
|
352 |
+
|
353 |
+
# Extract individual requirements
|
354 |
+
req_pattern = (
|
355 |
+
r"([A-Z]-[\w\s\(\)&]+)\s*" # Code
|
356 |
+
r"([^\|]+)\s*" # Description
|
357 |
+
r"([\d.]+)\s*" # Required
|
358 |
+
r"([\d.]+)\s*" # Waived
|
359 |
+
r"([\d.]+)\s*" # Completed
|
360 |
+
r"([\d.]+)\s*%" # Status
|
361 |
)
|
362 |
|
363 |
+
req_matches = re.finditer(req_pattern, req_text)
|
364 |
+
|
365 |
+
for match in req_matches:
|
366 |
+
req_code = match.group(1).strip()
|
367 |
+
self.requirements[req_code] = {
|
368 |
+
"description": match.group(2).strip(),
|
369 |
+
"required": float(match.group(3)),
|
370 |
+
"waived": float(match.group(4)),
|
371 |
+
"completed": float(match.group(5)),
|
372 |
+
"status": f"{match.group(6)}%"
|
373 |
+
}
|
374 |
+
|
375 |
+
# Extract total requirements
|
376 |
+
total_pattern = r"Total\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%"
|
377 |
+
total_match = re.search(total_pattern, req_text)
|
378 |
+
|
379 |
+
if total_match:
|
380 |
+
self.graduation_status["total_requirements"] = {
|
381 |
+
"required": float(total_match.group(1)),
|
382 |
+
"waived": float(total_match.group(2)),
|
383 |
+
"completed": float(total_match.group(3)),
|
384 |
+
"percent_complete": float(total_match.group(4))
|
385 |
}
|
386 |
|
387 |
def _extract_course_history(self, text: str):
|
388 |
+
"""Parse the detailed course history with improved pattern matching"""
|
389 |
+
# Find the course history table
|
390 |
+
course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
|
391 |
+
if not course_header:
|
392 |
+
raise ValueError("Could not find course history table header")
|
393 |
+
|
394 |
+
course_text = text[course_header.start():]
|
395 |
+
|
396 |
+
# Extract individual courses
|
397 |
+
course_pattern = (
|
398 |
+
r"([A-Z]-[\w\s\(\)&-]+)\s*" # Requirement
|
399 |
+
r"(\d{4}-\d{4})\s*" # School Year
|
400 |
+
r"(\d{2})\s*" # Grade Level
|
401 |
+
r"([A-Z0-9]+)\s*" # Course Number
|
402 |
+
r"([^\|]+)\s*" # Description
|
403 |
+
r"([A-Z0-9]+)\s*" # Term
|
404 |
+
r"([A-Z0-9]+)\s*" # District Number
|
405 |
+
r"([A-Z])\s*" # Final Grade
|
406 |
+
r"([A-Z])\s*" # Inclusion Status
|
407 |
+
r"([\d.]+|inProgress)" # Credits
|
408 |
)
|
409 |
|
410 |
+
course_matches = re.finditer(course_pattern, course_text)
|
411 |
+
|
412 |
+
for match in course_matches:
|
413 |
self.course_history.append({
|
414 |
+
"requirement_category": match.group(1).strip(),
|
415 |
+
"school_year": match.group(2),
|
416 |
+
"grade_level": match.group(3),
|
417 |
+
"course_code": match.group(4),
|
418 |
+
"description": match.group(5).strip(),
|
419 |
+
"term": match.group(6),
|
420 |
+
"district_number": match.group(7),
|
421 |
+
"grade": match.group(8),
|
422 |
+
"inclusion_status": match.group(9),
|
423 |
+
"credits": match.group(10)
|
424 |
})
|
425 |
|
426 |
def _extract_current_courses(self, text: str):
|
427 |
"""Identify courses currently in progress"""
|
|
|
428 |
self.current_courses = [
|
429 |
{
|
430 |
"course": c["description"],
|
431 |
+
"code": c["course_code"],
|
432 |
"category": c["requirement_category"],
|
433 |
"term": c["term"],
|
434 |
+
"credits": c["credits"],
|
435 |
+
"grade_level": c["grade_level"]
|
436 |
}
|
437 |
+
for c in self.course_history
|
438 |
+
if c["credits"].lower() == "inprogress"
|
439 |
]
|
440 |
|
441 |
+
def _calculate_completion(self):
|
442 |
+
"""Calculate overall completion status with more detailed info"""
|
443 |
total_required = sum(req["required"] for req in self.requirements.values())
|
444 |
total_completed = sum(req["completed"] for req in self.requirements.values())
|
445 |
|
446 |
+
self.graduation_status.update({
|
447 |
+
"total_required_credits": total_required,
|
448 |
+
"total_completed_credits": total_completed,
|
449 |
"percent_complete": round((total_completed / total_required) * 100, 1),
|
450 |
+
"remaining_credits": total_required - total_completed,
|
451 |
+
"on_track": (total_completed / total_required) >= 0.75 # 75% completion considered on track
|
452 |
+
})
|
453 |
|
454 |
def to_json(self) -> str:
|
455 |
"""Export parsed data as JSON"""
|
|
|
458 |
"requirements": self.requirements,
|
459 |
"current_courses": self.current_courses,
|
460 |
"course_history": self.course_history,
|
461 |
+
"graduation_status": self.graduation_status
|
462 |
}, indent=2)
|
463 |
|
464 |
+
def format_transcript_output(data: Dict) -> str:
|
465 |
+
"""Enhanced formatting for Miami-Dade transcript output"""
|
466 |
+
output = []
|
467 |
+
|
468 |
+
# Student Info Section
|
469 |
+
student = data.get("student_info", {})
|
470 |
+
output.append(f"## Student Transcript Summary\n{'='*50}")
|
471 |
+
output.append(f"**Name:** {student.get('name', 'Unknown')}")
|
472 |
+
output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
|
473 |
+
output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
|
474 |
+
output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
|
475 |
+
output.append(f"**Unweighted GPA:** {student.get('unweighted_gpa', 'N/A')}")
|
476 |
+
output.append(f"**Weighted GPA:** {student.get('weighted_gpa', 'N/A')}")
|
477 |
+
output.append(f"**Total Credits Earned:** {student.get('total_credits', 'N/A')}")
|
478 |
+
output.append(f"**Community Service Hours:** {student.get('community_service_hours', 'N/A')}\n")
|
479 |
+
|
480 |
+
# Graduation Requirements Section
|
481 |
+
grad_status = data.get("graduation_status", {})
|
482 |
+
output.append(f"## Graduation Progress\n{'='*50}")
|
483 |
+
output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
|
484 |
+
output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
|
485 |
+
output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
|
486 |
+
output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
|
487 |
+
output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
|
488 |
+
|
489 |
+
# Detailed Requirements
|
490 |
+
output.append("### Detailed Requirements:")
|
491 |
+
for code, req in data.get("requirements", {}).items():
|
492 |
+
output.append(
|
493 |
+
f"- **{code}**: {req.get('description', '')}\n"
|
494 |
+
f" Required: {req['required']} | Completed: {req['completed']} | "
|
495 |
+
f"Status: {req['status']}"
|
496 |
+
)
|
497 |
+
output.append("")
|
498 |
+
|
499 |
+
# Current Courses
|
500 |
+
if data.get("current_courses"):
|
501 |
+
output.append("## Current Courses (In Progress)\n" + '='*50)
|
502 |
+
for course in data["current_courses"]:
|
503 |
+
output.append(
|
504 |
+
f"- **{course['code']} {course['course']}**\n"
|
505 |
+
f" Category: {course['category']} | "
|
506 |
+
f"Grade Level: {course['grade_level']} | "
|
507 |
+
f"Term: {course['term']} | Credits: {course['credits']}"
|
508 |
+
)
|
509 |
+
output.append("")
|
510 |
+
|
511 |
+
# Course History by Year
|
512 |
+
courses_by_year = defaultdict(list)
|
513 |
+
for course in data.get("course_history", []):
|
514 |
+
courses_by_year[course["school_year"]].append(course)
|
515 |
|
516 |
+
if courses_by_year:
|
517 |
+
output.append("## Course History\n" + '='*50)
|
518 |
+
for year in sorted(courses_by_year.keys()):
|
519 |
+
output.append(f"\n### {year}")
|
520 |
+
for course in courses_by_year[year]:
|
521 |
+
output.append(
|
522 |
+
f"- **{course['course_code']} {course['description']}**\n"
|
523 |
+
f" Grade: {course['grade']} | Credits: {course['credits']} | "
|
524 |
+
f"Category: {course['requirement_category']} | Term: {course['term']}"
|
525 |
+
)
|
526 |
+
|
527 |
+
return '\n'.join(output)
|
528 |
+
|
529 |
+
def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
530 |
+
"""Enhanced AI parsing with fallback to structured parsing"""
|
531 |
try:
|
532 |
+
# First try structured parsing
|
533 |
if progress:
|
534 |
+
progress(0.1, desc="Attempting structured parsing...")
|
535 |
+
|
536 |
parser = TranscriptParser()
|
537 |
parsed_data = parser.parse_transcript(text)
|
538 |
+
|
539 |
if progress:
|
540 |
+
progress(0.8, desc="Formatting results...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
|
542 |
+
return parsed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
|
|
|
|
|
|
|
|
|
544 |
except Exception as e:
|
545 |
logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
|
546 |
+
|
547 |
# Fall back to AI parsing if structured parsing fails
|
548 |
return parse_transcript_with_ai_fallback(text, progress)
|
549 |
|
|
|
557 |
- Current grade level
|
558 |
- Weighted GPA (if available)
|
559 |
- Unweighted GPA (if available)
|
560 |
+
- Total credits earned
|
561 |
+
- Community service hours (if available)
|
562 |
- List of all courses with:
|
563 |
* Course code
|
564 |
* Course name
|
|
|
566 |
* Credits earned
|
567 |
* Year/semester taken
|
568 |
* Grade level when taken
|
569 |
+
- Graduation requirements status
|
570 |
Return the data in JSON format.
|
571 |
|
572 |
Transcript Text:
|
|
|
577 |
if progress:
|
578 |
progress(0.1, desc="Processing transcript with AI...")
|
579 |
|
580 |
+
model, tokenizer = model_loader.load_model(progress)
|
581 |
+
if model is None or tokenizer is None:
|
582 |
+
raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
|
583 |
+
|
584 |
# Tokenize and generate response
|
585 |
inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
|
586 |
if progress:
|
|
|
588 |
|
589 |
outputs = model.generate(
|
590 |
**inputs,
|
591 |
+
max_new_tokens=2000,
|
592 |
temperature=0.1,
|
593 |
do_sample=True
|
594 |
)
|
|
|
617 |
logging.error(f"AI parsing error: {str(e)}")
|
618 |
raise gr.Error(f"Error processing transcript: {str(e)}")
|
619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
621 |
+
"""Main function to parse transcript files with better error handling"""
|
622 |
try:
|
623 |
if not file_obj:
|
624 |
raise ValueError("Please upload a file first")
|
|
|
626 |
validate_file(file_obj)
|
627 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
628 |
|
629 |
+
# Extract text from file with better error reporting
|
630 |
+
if progress:
|
631 |
+
progress(0.2, desc="Extracting text from file...")
|
632 |
+
|
633 |
text = extract_text_from_file(file_obj.name, file_ext)
|
634 |
|
635 |
+
if not text.strip():
|
636 |
+
raise ValueError("No text could be extracted from the file")
|
637 |
+
|
638 |
+
# Use AI for parsing with progress updates
|
639 |
+
if progress:
|
640 |
+
progress(0.4, desc="Analyzing transcript content...")
|
641 |
+
|
642 |
parsed_data = parse_transcript_with_ai(text, progress)
|
643 |
|
644 |
# Format output text
|
645 |
+
if progress:
|
646 |
+
progress(0.9, desc="Generating report...")
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
|
648 |
+
output_text = format_transcript_output(parsed_data)
|
|
|
|
|
|
|
649 |
|
650 |
+
return output_text, parsed_data
|
651 |
|
652 |
except Exception as e:
|
653 |
+
error_msg = f"Error processing transcript: {str(e)}"
|
654 |
+
logging.error(error_msg)
|
655 |
+
|
656 |
+
# Provide helpful tips based on error type
|
657 |
+
if "No text could be extracted" in str(e):
|
658 |
+
error_msg += "\n\nTips: Please ensure your file is clear and readable. Try scanning at a higher resolution if it's an image."
|
659 |
+
elif "requirements table header" in str(e):
|
660 |
+
error_msg += "\n\nTips: This appears to be an unsupported transcript format. Please contact support."
|
661 |
+
|
662 |
+
return error_msg, None
|
663 |
|
664 |
# ========== LEARNING STYLE QUIZ ==========
|
665 |
class LearningStyleQuiz:
|
|
|
1539 |
"Your profile summary will appear here after saving.",
|
1540 |
label="Profile Summary"
|
1541 |
)
|
1542 |
+
blog = gr.Textbox(label="Personal Blog", visible=False)
|
1543 |
|
1544 |
def save_profile_and_update(name, age, interests, transcript_data, learning_style,
|
1545 |
movie, movie_reason, show, show_reason,
|
|
|
1696 |
|
1697 |
if __name__ == "__main__":
|
1698 |
app.launch()
|
1699 |
+
|