Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -173,7 +173,7 @@ def validate_age(age: Union[int, float, str]) -> int:
|
|
173 |
def validate_file(file_obj) -> None:
|
174 |
"""Validate uploaded file."""
|
175 |
if not file_obj:
|
176 |
-
raise ValueError("Please upload a file first
|
177 |
|
178 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
179 |
if file_ext not in ALLOWED_FILE_TYPES:
|
@@ -319,149 +319,122 @@ class TranscriptParser:
|
|
319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
320 |
|
321 |
def _parse_miami_dade_format(self, text: str) -> Dict:
|
322 |
-
"""
|
323 |
-
#
|
324 |
-
|
325 |
-
|
326 |
-
#
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
r'(?:Total\s*Credits)[:]?\s*([\d.]+).*?'
|
334 |
-
r'(?:Comm\s*Serv|Service\s*Hours)[:]?\s*(\d+)',
|
335 |
-
text, re.IGNORECASE | re.DOTALL
|
336 |
-
)
|
337 |
-
|
338 |
-
if student_match:
|
339 |
-
self.student_data = {
|
340 |
-
"id": student_match.group(1).strip(),
|
341 |
-
"name": student_match.group(2).replace(",", ", ").strip().title(),
|
342 |
-
"current_grade": student_match.group(3),
|
343 |
-
"graduation_year": student_match.group(4),
|
344 |
-
"unweighted_gpa": float(student_match.group(5)),
|
345 |
-
"weighted_gpa": float(student_match.group(6)),
|
346 |
-
"total_credits": float(student_match.group(7)),
|
347 |
-
"community_service_hours": int(student_match.group(8))
|
348 |
-
}
|
349 |
-
else:
|
350 |
-
# Fallback pattern if first one fails
|
351 |
-
student_match = re.search(
|
352 |
-
r'(\d{7})\s*(.*?)\s*(?:Grade|Grd)[:]?\s*(\d+)',
|
353 |
-
text, re.IGNORECASE
|
354 |
-
)
|
355 |
-
if student_match:
|
356 |
-
self.student_data = {
|
357 |
-
"id": student_match.group(1).strip(),
|
358 |
-
"name": student_match.group(2).strip().title(),
|
359 |
-
"current_grade": student_match.group(3),
|
360 |
-
"graduation_year": "",
|
361 |
-
"unweighted_gpa": 0.0,
|
362 |
-
"weighted_gpa": 0.0,
|
363 |
-
"total_credits": 0.0,
|
364 |
-
"community_service_hours": 0
|
365 |
-
}
|
366 |
-
|
367 |
-
# Extract requirements
|
368 |
-
self.requirements = {}
|
369 |
-
req_section = re.search(
|
370 |
-
r"Code\s+Description\s+Required\s+Waived\s+Completed\s+Status(.*?)Total\s+\d+\.\d+\s+\d+\.\d+\s+\d+\.\d+\s+\d+%",
|
371 |
-
text, re.DOTALL
|
372 |
-
)
|
373 |
-
if req_section:
|
374 |
-
req_lines = req_section.group(1).strip().split('\n')
|
375 |
-
for line in req_lines:
|
376 |
-
line = line.strip()
|
377 |
-
if not line:
|
378 |
-
continue
|
379 |
-
|
380 |
-
req_match = re.match(r"([A-Z]-[^\s]+)\s+(.+?)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)%", line)
|
381 |
-
if req_match:
|
382 |
-
code = req_match.group(1).strip()
|
383 |
-
self.requirements[code] = {
|
384 |
-
"description": req_match.group(2).strip(),
|
385 |
-
"required": float(req_match.group(3)),
|
386 |
-
"waived": float(req_match.group(4)),
|
387 |
-
"completed": float(req_match.group(5)),
|
388 |
-
"status": f"{req_match.group(6)}%"
|
389 |
-
}
|
390 |
-
|
391 |
-
# Extract course history
|
392 |
-
self.course_history = []
|
393 |
-
course_section = re.search(
|
394 |
-
r"Requirement\s+School Year\s+GradeLv1\s+CrsNum\s+Description\s+Term\s+DstNumber\s+FG\s+Incl\s+Credits(.*?)Legend for Incl",
|
395 |
-
text, re.DOTALL
|
396 |
-
)
|
397 |
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
#
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
"current_courses": self.current_courses,
|
433 |
-
"course_history": self.course_history,
|
434 |
-
"graduation_status": self.graduation_status,
|
435 |
-
"format": "miami_dade"
|
436 |
}
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
self.current_courses = [
|
441 |
-
{
|
442 |
-
"course": c["description"],
|
443 |
-
"code": c["course_code"],
|
444 |
-
"category": c["requirement_category"],
|
445 |
-
"term": c["term"],
|
446 |
-
"credits": c["credits"],
|
447 |
-
"grade_level": c["grade_level"]
|
448 |
-
}
|
449 |
-
for c in self.course_history
|
450 |
-
if c.get("credits") and isinstance(c["credits"], str) and c["credits"].lower() == "inprogress"
|
451 |
-
]
|
452 |
-
|
453 |
-
def _calculate_completion(self):
|
454 |
-
"""Compute graduation readiness."""
|
455 |
-
total_required = sum(req["required"] for req in self.requirements.values())
|
456 |
-
total_completed = sum(req["completed"] for req in self.requirements.values())
|
457 |
-
|
458 |
-
self.graduation_status.update({
|
459 |
-
"total_required_credits": total_required,
|
460 |
-
"total_completed_credits": total_completed,
|
461 |
-
"percent_complete": round((total_completed / total_required) * 100, 1) if total_required > 0 else 0,
|
462 |
-
"remaining_credits": total_required - total_completed,
|
463 |
-
"on_track": (total_completed / total_required) >= 0.75 if total_required > 0 else False
|
464 |
-
})
|
465 |
|
466 |
def format_transcript_output(data: Dict) -> str:
|
467 |
"""Enhanced formatting for transcript output with format awareness"""
|
@@ -500,24 +473,25 @@ def format_transcript_output(data: Dict) -> str:
|
|
500 |
|
501 |
# Detailed Requirements
|
502 |
output.append("### Detailed Requirements:")
|
503 |
-
for
|
504 |
output.append(
|
505 |
-
f"- **{code}**: {req
|
506 |
f" Required: {req['required']} | Completed: {req['completed']} | "
|
507 |
f"Status: {req['status']}"
|
508 |
)
|
509 |
output.append("")
|
510 |
|
511 |
# Current Courses
|
512 |
-
if data.get("
|
513 |
output.append("## Current Courses (In Progress)\n" + '='*50)
|
514 |
-
for course in data["
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
|
|
521 |
output.append("")
|
522 |
|
523 |
# Course History by Year
|
@@ -532,9 +506,9 @@ def format_transcript_output(data: Dict) -> str:
|
|
532 |
output.append(f"\n### {year}")
|
533 |
for course in courses_by_year[year]:
|
534 |
output.append(
|
535 |
-
f"- **{course.get('
|
536 |
-
f" Subject: {course.get('
|
537 |
-
f"Grade: {course.get('
|
538 |
f"Credits: {course.get('credits', 'N/A')}"
|
539 |
)
|
540 |
|
|
|
173 |
def validate_file(file_obj) -> None:
|
174 |
"""Validate uploaded file."""
|
175 |
if not file_obj:
|
176 |
+
raise ValueError("Please upload a file first")
|
177 |
|
178 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
179 |
if file_ext not in ALLOWED_FILE_TYPES:
|
|
|
319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
320 |
|
321 |
def _parse_miami_dade_format(self, text: str) -> Dict:
|
322 |
+
"""Parse Miami-Dade County Public Schools transcripts."""
|
323 |
+
# Initialize PDF reader from text (simulating the PDF structure)
|
324 |
+
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
325 |
+
|
326 |
+
# Initialize data structure
|
327 |
+
data = {
|
328 |
+
'student_info': {},
|
329 |
+
'graduation_requirements': [],
|
330 |
+
'course_history': [],
|
331 |
+
'summary': {}
|
332 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
+
# Parse student information
|
335 |
+
student_info_lines = []
|
336 |
+
for line in lines:
|
337 |
+
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
338 |
+
student_info_lines = lines[lines.index(line):lines.index(line)+5]
|
339 |
+
break
|
340 |
+
|
341 |
+
if student_info_lines:
|
342 |
+
# Parse school and cohort info
|
343 |
+
school_info = student_info_lines[0].split('|')
|
344 |
+
data['student_info']['school'] = school_info[1].strip()
|
345 |
+
data['student_info']['district'] = school_info[2].strip()
|
346 |
+
|
347 |
+
# Parse student name and ID
|
348 |
+
name_id_line = student_info_lines[1].split('-')
|
349 |
+
data['student_info']['student_id'] = name_id_line[0].strip()
|
350 |
+
data['student_info']['student_name'] = name_id_line[1].split(',')[1].strip() + " " + name_id_line[1].split(',')[0].strip()
|
351 |
+
|
352 |
+
# Parse academic info
|
353 |
+
academic_info = student_info_lines[2].split('|')
|
354 |
+
data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip()
|
355 |
+
data['student_info']['graduation_year'] = academic_info[2].strip()
|
356 |
+
data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip()
|
357 |
+
data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip()
|
358 |
+
data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip()
|
359 |
+
|
360 |
+
# Parse graduation requirements
|
361 |
+
requirements_start = None
|
362 |
+
requirements_end = None
|
363 |
+
for i, line in enumerate(lines):
|
364 |
+
if "Code" in line and "Description" in line and "Required" in line:
|
365 |
+
requirements_start = i + 1
|
366 |
+
if requirements_start and "Total" in line:
|
367 |
+
requirements_end = i
|
368 |
+
break
|
369 |
+
|
370 |
+
if requirements_start and requirements_end:
|
371 |
+
for line in lines[requirements_start:requirements_end]:
|
372 |
+
if '|' in line:
|
373 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
374 |
+
if len(parts) >= 6:
|
375 |
+
req = {
|
376 |
+
'code': parts[0],
|
377 |
+
'description': parts[1],
|
378 |
+
'required': parts[2],
|
379 |
+
'waived': parts[3],
|
380 |
+
'completed': parts[4],
|
381 |
+
'status': parts[5]
|
382 |
+
}
|
383 |
+
data['graduation_requirements'].append(req)
|
384 |
+
|
385 |
+
# Parse total line
|
386 |
+
total_line = lines[requirements_end]
|
387 |
+
total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
|
388 |
+
if len(total_parts) >= 5:
|
389 |
+
data['summary']['total_required'] = total_parts[1]
|
390 |
+
data['summary']['total_waived'] = total_parts[2]
|
391 |
+
data['summary']['total_completed'] = total_parts[3]
|
392 |
+
data['summary']['completion_percentage'] = total_parts[4]
|
393 |
+
|
394 |
+
# Parse course history
|
395 |
+
course_history_start = None
|
396 |
+
for i, line in enumerate(lines):
|
397 |
+
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
398 |
+
course_history_start = i + 1
|
399 |
+
break
|
400 |
+
|
401 |
+
if course_history_start:
|
402 |
+
current_requirement = None
|
403 |
+
for line in lines[course_history_start:]:
|
404 |
+
if '|' in line:
|
405 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
406 |
|
407 |
+
# Check if this is a new requirement line
|
408 |
+
if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
|
409 |
+
current_requirement = parts[0]
|
410 |
+
parts = parts[1:] # Remove the requirement code
|
411 |
+
|
412 |
+
if len(parts) >= 9:
|
413 |
+
course = {
|
414 |
+
'requirement': current_requirement,
|
415 |
+
'school_year': parts[0],
|
416 |
+
'grade_level': parts[1],
|
417 |
+
'course_number': parts[2],
|
418 |
+
'description': parts[3],
|
419 |
+
'term': parts[4],
|
420 |
+
'district_number': parts[5],
|
421 |
+
'fg': parts[6],
|
422 |
+
'included': parts[7],
|
423 |
+
'credits': parts[8]
|
424 |
+
}
|
425 |
+
data['course_history'].append(course)
|
426 |
+
|
427 |
+
# Calculate graduation status
|
428 |
+
graduation_status = {
|
429 |
+
'total_required_credits': float(data['summary']['total_required']),
|
430 |
+
'total_completed_credits': float(data['summary']['total_completed']),
|
431 |
+
'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
|
432 |
+
'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
|
433 |
+
'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
|
|
|
|
|
|
|
|
|
434 |
}
|
435 |
+
data['graduation_status'] = graduation_status
|
436 |
+
|
437 |
+
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
|
439 |
def format_transcript_output(data: Dict) -> str:
|
440 |
"""Enhanced formatting for transcript output with format awareness"""
|
|
|
473 |
|
474 |
# Detailed Requirements
|
475 |
output.append("### Detailed Requirements:")
|
476 |
+
for req in data.get("graduation_requirements", []):
|
477 |
output.append(
|
478 |
+
f"- **{req['code']}**: {req['description']}\n"
|
479 |
f" Required: {req['required']} | Completed: {req['completed']} | "
|
480 |
f"Status: {req['status']}"
|
481 |
)
|
482 |
output.append("")
|
483 |
|
484 |
# Current Courses
|
485 |
+
if any(c.get('credits', '') == 'inProgress' for c in data.get("course_history", [])):
|
486 |
output.append("## Current Courses (In Progress)\n" + '='*50)
|
487 |
+
for course in data["course_history"]:
|
488 |
+
if course.get('credits', '') == 'inProgress':
|
489 |
+
output.append(
|
490 |
+
f"- **{course['course_number']} {course['description']}**\n"
|
491 |
+
f" Category: {course['requirement']} | "
|
492 |
+
f"Grade Level: {course['grade_level']} | "
|
493 |
+
f"Term: {course['term']} | Credits: {course['credits']}"
|
494 |
+
)
|
495 |
output.append("")
|
496 |
|
497 |
# Course History by Year
|
|
|
506 |
output.append(f"\n### {year}")
|
507 |
for course in courses_by_year[year]:
|
508 |
output.append(
|
509 |
+
f"- **{course.get('course_number', '')} {course.get('description', 'Unnamed course')}**\n"
|
510 |
+
f" Subject: {course.get('requirement', 'N/A')} | "
|
511 |
+
f"Grade: {course.get('fg', 'N/A')} | "
|
512 |
f"Credits: {course.get('credits', 'N/A')}"
|
513 |
)
|
514 |
|