Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,37 +54,64 @@ def parse_transcript(file):
|
|
| 54 |
if page_text:
|
| 55 |
text += page_text + '\n'
|
| 56 |
|
| 57 |
-
# GPA extraction
|
| 58 |
-
gpa_match = re.search(r'GPA[:\s]*(\d\.\d{1,2})', text, re.IGNORECASE)
|
| 59 |
-
gpa = gpa_match.group(1) if gpa_match else "N/A"
|
| 60 |
-
|
| 61 |
# Grade level extraction
|
| 62 |
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
|
| 63 |
grade_level = grade_match.group(2) if grade_match else "Unknown"
|
| 64 |
|
| 65 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
| 67 |
|
| 68 |
-
output_text = f"Grade Level: {grade_level}\
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
for level, courses in courses_by_grade.items():
|
| 70 |
-
output_text += f"
|
| 71 |
for course in courses:
|
| 72 |
output_text += f"- {course['course']}"
|
| 73 |
if 'grade' in course:
|
| 74 |
output_text += f" (Grade: {course['grade']})"
|
| 75 |
output_text += "\n"
|
| 76 |
-
output_text += "\n"
|
| 77 |
|
| 78 |
return output_text, {
|
| 79 |
-
"gpa":
|
| 80 |
"grade_level": grade_level,
|
| 81 |
"courses": courses_by_grade
|
| 82 |
}
|
| 83 |
-
|
| 84 |
else:
|
| 85 |
return "Unsupported file format", None
|
| 86 |
|
| 87 |
-
# For CSV/XLSX
|
| 88 |
gpa = "N/A"
|
| 89 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
| 90 |
if col in df.columns:
|
|
@@ -107,7 +134,7 @@ def parse_transcript(file):
|
|
| 107 |
output_text += "\n".join(f"- {course}" for course in courses)
|
| 108 |
|
| 109 |
return output_text, {
|
| 110 |
-
"gpa": gpa,
|
| 111 |
"grade_level": grade_level,
|
| 112 |
"courses": courses
|
| 113 |
}
|
|
|
|
| 54 |
if page_text:
|
| 55 |
text += page_text + '\n'
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Grade level extraction
|
| 58 |
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
|
| 59 |
grade_level = grade_match.group(2) if grade_match else "Unknown"
|
| 60 |
|
| 61 |
+
# Enhanced GPA extraction
|
| 62 |
+
gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
|
| 63 |
+
gpa_patterns = [
|
| 64 |
+
r'Weighted GPA[\s:]*(\d\.\d{1,2})',
|
| 65 |
+
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
| 66 |
+
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
| 67 |
+
r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
|
| 68 |
+
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
| 69 |
+
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
| 70 |
+
r'GPA[\s:]*(\d\.\d{1,2})'
|
| 71 |
+
]
|
| 72 |
+
for pattern in gpa_patterns:
|
| 73 |
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
| 74 |
+
gpa_value = match.group(1)
|
| 75 |
+
if 'weighted' in pattern.lower():
|
| 76 |
+
gpa_data['weighted'] = gpa_value
|
| 77 |
+
elif 'unweighted' in pattern.lower():
|
| 78 |
+
gpa_data['unweighted'] = gpa_value
|
| 79 |
+
else:
|
| 80 |
+
if gpa_data['unweighted'] == "N/A":
|
| 81 |
+
gpa_data['unweighted'] = gpa_value
|
| 82 |
+
if gpa_data['weighted'] == "N/A":
|
| 83 |
+
gpa_data['weighted'] = gpa_value
|
| 84 |
+
|
| 85 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
| 86 |
|
| 87 |
+
output_text = f"Grade Level: {grade_level}\n\n"
|
| 88 |
+
if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A":
|
| 89 |
+
output_text += "GPA Information:\n"
|
| 90 |
+
if gpa_data['unweighted'] != "N/A":
|
| 91 |
+
output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
|
| 92 |
+
if gpa_data['weighted'] != "N/A":
|
| 93 |
+
output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
|
| 94 |
+
else:
|
| 95 |
+
output_text += "No GPA information found\n"
|
| 96 |
+
|
| 97 |
+
output_text += "\nCourses by Grade Level:\n"
|
| 98 |
for level, courses in courses_by_grade.items():
|
| 99 |
+
output_text += f"\nGrade {level}:\n"
|
| 100 |
for course in courses:
|
| 101 |
output_text += f"- {course['course']}"
|
| 102 |
if 'grade' in course:
|
| 103 |
output_text += f" (Grade: {course['grade']})"
|
| 104 |
output_text += "\n"
|
|
|
|
| 105 |
|
| 106 |
return output_text, {
|
| 107 |
+
"gpa": gpa_data,
|
| 108 |
"grade_level": grade_level,
|
| 109 |
"courses": courses_by_grade
|
| 110 |
}
|
|
|
|
| 111 |
else:
|
| 112 |
return "Unsupported file format", None
|
| 113 |
|
| 114 |
+
# For CSV/XLSX fallback
|
| 115 |
gpa = "N/A"
|
| 116 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
| 117 |
if col in df.columns:
|
|
|
|
| 134 |
output_text += "\n".join(f"- {course}" for course in courses)
|
| 135 |
|
| 136 |
return output_text, {
|
| 137 |
+
"gpa": {"unweighted": gpa, "weighted": "N/A"},
|
| 138 |
"grade_level": grade_level,
|
| 139 |
"courses": courses
|
| 140 |
}
|