Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,58 +14,47 @@ except Exception as e:
|
|
14 |
print(f"Could not load NER model: {e}")
|
15 |
ner_pipeline = None
|
16 |
|
17 |
-
# ==========
|
18 |
-
def extract_gpa(text):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
23 |
-
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
24 |
-
r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
|
25 |
-
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
26 |
-
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
27 |
-
r'GPA[\s:]*(\d\.\d{1,2})'
|
28 |
-
]
|
29 |
-
for pattern in gpa_patterns:
|
30 |
-
for match in re.finditer(pattern, text, re.IGNORECASE):
|
31 |
-
gpa_value = match.group(1)
|
32 |
-
if 'weighted' in pattern.lower():
|
33 |
-
gpa_data['weighted'] = gpa_value
|
34 |
-
elif 'unweighted' in pattern.lower():
|
35 |
-
gpa_data['unweighted'] = gpa_value
|
36 |
-
else:
|
37 |
-
if gpa_data['unweighted'] == "N/A":
|
38 |
-
gpa_data['unweighted'] = gpa_value
|
39 |
-
if gpa_data['weighted'] == "N/A":
|
40 |
-
gpa_data['weighted'] = gpa_value
|
41 |
-
return gpa_data
|
42 |
|
43 |
-
def
|
44 |
-
|
45 |
-
|
46 |
-
r'(
|
47 |
-
r'(
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def parse_transcript(file):
|
71 |
if file.name.endswith('.pdf'):
|
@@ -74,83 +63,40 @@ def parse_transcript(file):
|
|
74 |
for page in reader.pages:
|
75 |
text += page.extract_text() + '\n'
|
76 |
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
current_course = {}
|
83 |
-
for entity in entities:
|
84 |
-
if entity['word'].startswith('##'):
|
85 |
-
current_course['name'] = current_course.get('name', '') + entity['word'][2:]
|
86 |
-
elif entity['entity'] in ['B-ORG', 'I-ORG']: # Using ORG as proxy for courses
|
87 |
-
if 'name' in current_course:
|
88 |
-
courses.append(current_course)
|
89 |
-
current_course = {'name': entity['word']}
|
90 |
-
elif entity['entity'] == 'GRADE' and current_course:
|
91 |
-
current_course['grade'] = entity['word']
|
92 |
-
if current_course:
|
93 |
-
courses.append(current_course)
|
94 |
-
except Exception as e:
|
95 |
-
print(f"NER failed: {e}")
|
96 |
|
97 |
-
#
|
98 |
-
|
99 |
-
|
100 |
|
101 |
-
#
|
102 |
-
|
103 |
-
courses_by_grade = defaultdict(list)
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
courses_by_grade[section['grade']].append(course)
|
111 |
-
else:
|
112 |
-
courses_by_grade["All"] = courses
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
118 |
|
119 |
return output_text, {
|
120 |
"gpa": gpa_data,
|
|
|
121 |
"courses": dict(courses_by_grade)
|
122 |
}
|
123 |
-
elif file.name.endswith('.csv'):
|
124 |
-
df = pd.read_csv(file)
|
125 |
-
elif file.name.endswith('.xlsx'):
|
126 |
-
df = pd.read_excel(file)
|
127 |
else:
|
128 |
-
return "Unsupported file format", None
|
129 |
-
|
130 |
-
# Fallback for CSV/Excel
|
131 |
-
gpa = "N/A"
|
132 |
-
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
133 |
-
if col in df.columns:
|
134 |
-
gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
|
135 |
-
break
|
136 |
-
|
137 |
-
grade_level = "N/A"
|
138 |
-
for col in ['Grade Level', 'Grade', 'Class', 'Year']:
|
139 |
-
if col in df.columns:
|
140 |
-
grade_level = df[col].iloc[0]
|
141 |
-
break
|
142 |
-
|
143 |
-
courses = []
|
144 |
-
for col in ['Course', 'Subject', 'Course Name', 'Class']:
|
145 |
-
if col in df.columns:
|
146 |
-
courses = df[col].tolist()
|
147 |
-
break
|
148 |
-
|
149 |
-
return f"Grade Level: {grade_level}\nGPA: {gpa}", {
|
150 |
-
"gpa": {"unweighted": gpa, "weighted": "N/A"},
|
151 |
-
"grade_level": grade_level,
|
152 |
-
"courses": courses
|
153 |
-
}
|
154 |
|
155 |
# ========== LEARNING STYLE QUIZ ==========
|
156 |
learning_style_questions = [
|
@@ -326,28 +272,14 @@ def transcript_display(transcript_dict):
|
|
326 |
courses_by_grade = transcript_dict["courses"]
|
327 |
|
328 |
if isinstance(courses_by_grade, dict):
|
329 |
-
for grade, courses in courses_by_grade.items():
|
330 |
-
display += f"**{grade}**\n"
|
331 |
for course in courses:
|
332 |
-
|
333 |
-
display += f"- {course.get('name', 'N/A')}"
|
334 |
-
if 'grade' in course:
|
335 |
-
display += f" (Grade: {course['grade']})"
|
336 |
-
if 'term' in course:
|
337 |
-
display += f" | Term: {course['term']}"
|
338 |
-
display += "\n"
|
339 |
-
else:
|
340 |
-
display += f"- {str(course)}\n"
|
341 |
-
display += "\n"
|
342 |
-
elif isinstance(courses_by_grade, list):
|
343 |
-
for course in courses_by_grade:
|
344 |
-
if isinstance(course, dict):
|
345 |
-
display += f"- {course.get('name', 'N/A')}"
|
346 |
if 'grade' in course:
|
347 |
display += f" (Grade: {course['grade']})"
|
348 |
display += "\n"
|
349 |
-
|
350 |
-
display += f"- {str(course)}\n"
|
351 |
|
352 |
if 'gpa' in transcript_dict:
|
353 |
gpa = transcript_dict['gpa']
|
@@ -447,8 +379,8 @@ def generate_response(message, history):
|
|
447 |
with gr.Blocks() as app:
|
448 |
with gr.Tab("Step 1: Upload Transcript"):
|
449 |
gr.Markdown("### Upload your transcript (PDF recommended for best results)")
|
450 |
-
transcript_file = gr.File(label="Transcript file", file_types=[".pdf"
|
451 |
-
transcript_output = gr.Textbox(label="
|
452 |
transcript_data = gr.State()
|
453 |
transcript_file.change(
|
454 |
fn=parse_transcript,
|
@@ -509,5 +441,5 @@ with gr.Blocks() as app:
|
|
509 |
)
|
510 |
|
511 |
if __name__ == "__main__":
|
512 |
-
app.launch()
|
513 |
|
|
|
14 |
print(f"Could not load NER model: {e}")
|
15 |
ner_pipeline = None
|
16 |
|
17 |
+
# ========== TRANSCRIPT PARSING FUNCTIONS ==========
|
18 |
+
def extract_gpa(text, gpa_type):
|
19 |
+
pattern = rf'{gpa_type}\s*([\d\.]+)'
|
20 |
+
match = re.search(pattern, text)
|
21 |
+
return match.group(1) if match else "N/A"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
def extract_courses_from_table(text):
|
24 |
+
# This pattern matches the course table rows in the transcript
|
25 |
+
course_pattern = re.compile(
|
26 |
+
r'(\d{4}-\d{4})\s*' # School year
|
27 |
+
r'\|?\s*(\d+)\s*' # Grade level
|
28 |
+
r'\|?\s*([A-Z0-9]+)\s*' # Course code
|
29 |
+
r'\|?\s*([^\|]+?)\s*' # Course name (captures until next pipe)
|
30 |
+
r'(?:\|\s*[^\|]*){2}' # Skip Term and DstNumber
|
31 |
+
r'\|\s*([A-FW]?)\s*' # Grade (FG column)
|
32 |
+
r'(?:\|\s*[^\|]*)' # Skip Incl column
|
33 |
+
r'\|\s*([\d\.]+|inProgress)' # Credits
|
34 |
+
)
|
35 |
+
|
36 |
+
courses_by_grade = defaultdict(list)
|
37 |
+
|
38 |
+
for match in re.finditer(course_pattern, text):
|
39 |
+
year_range, grade_level, course_code, course_name, grade, credits = match.groups()
|
40 |
+
|
41 |
+
# Clean up course name
|
42 |
+
course_name = course_name.strip()
|
43 |
+
if 'DE:' in course_name:
|
44 |
+
course_name = course_name.replace('DE:', 'Dual Enrollment:')
|
45 |
+
|
46 |
+
course_info = {
|
47 |
+
'name': f"{course_code} {course_name}",
|
48 |
+
'year': year_range,
|
49 |
+
'credits': credits
|
50 |
+
}
|
51 |
+
|
52 |
+
if grade and grade.strip():
|
53 |
+
course_info['grade'] = grade.strip()
|
54 |
+
|
55 |
+
courses_by_grade[grade_level].append(course_info)
|
56 |
+
|
57 |
+
return courses_by_grade
|
58 |
|
59 |
def parse_transcript(file):
|
60 |
if file.name.endswith('.pdf'):
|
|
|
63 |
for page in reader.pages:
|
64 |
text += page.extract_text() + '\n'
|
65 |
|
66 |
+
# Extract GPA information
|
67 |
+
gpa_data = {
|
68 |
+
'weighted': extract_gpa(text, 'Weighted GPA'),
|
69 |
+
'unweighted': extract_gpa(text, 'Un-weighted GPA')
|
70 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
+
# Extract current grade level
|
73 |
+
grade_match = re.search(r'Current Grade:\s*(\d+)', text)
|
74 |
+
grade_level = grade_match.group(1) if grade_match else "Unknown"
|
75 |
|
76 |
+
# Extract all courses with grades and year taken
|
77 |
+
courses_by_grade = extract_courses_from_table(text)
|
|
|
78 |
|
79 |
+
# Prepare output text
|
80 |
+
output_text = f"Grade Level: {grade_level}\n"
|
81 |
+
output_text += f"Weighted GPA: {gpa_data['weighted']}\n"
|
82 |
+
output_text += f"Unweighted GPA: {gpa_data['unweighted']}\n\n"
|
83 |
+
output_text += "Course History:\n"
|
|
|
|
|
|
|
84 |
|
85 |
+
for grade, courses in sorted(courses_by_grade.items()):
|
86 |
+
output_text += f"\nGrade {grade}:\n"
|
87 |
+
for course in courses:
|
88 |
+
output_text += f"- {course['name']}"
|
89 |
+
if 'grade' in course:
|
90 |
+
output_text += f" (Grade: {course['grade']})"
|
91 |
+
output_text += "\n"
|
92 |
|
93 |
return output_text, {
|
94 |
"gpa": gpa_data,
|
95 |
+
"grade_level": grade_level,
|
96 |
"courses": dict(courses_by_grade)
|
97 |
}
|
|
|
|
|
|
|
|
|
98 |
else:
|
99 |
+
return "Unsupported file format (PDF only for transcript parsing)", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# ========== LEARNING STYLE QUIZ ==========
|
102 |
learning_style_questions = [
|
|
|
272 |
courses_by_grade = transcript_dict["courses"]
|
273 |
|
274 |
if isinstance(courses_by_grade, dict):
|
275 |
+
for grade, courses in sorted(courses_by_grade.items()):
|
276 |
+
display += f"**Grade {grade}**\n"
|
277 |
for course in courses:
|
278 |
+
display += f"- {course['name']}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
if 'grade' in course:
|
280 |
display += f" (Grade: {course['grade']})"
|
281 |
display += "\n"
|
282 |
+
display += "\n"
|
|
|
283 |
|
284 |
if 'gpa' in transcript_dict:
|
285 |
gpa = transcript_dict['gpa']
|
|
|
379 |
with gr.Blocks() as app:
|
380 |
with gr.Tab("Step 1: Upload Transcript"):
|
381 |
gr.Markdown("### Upload your transcript (PDF recommended for best results)")
|
382 |
+
transcript_file = gr.File(label="Transcript file", file_types=[".pdf"])
|
383 |
+
transcript_output = gr.Textbox(label="Transcript Results", lines=10)
|
384 |
transcript_data = gr.State()
|
385 |
transcript_file.change(
|
386 |
fn=parse_transcript,
|
|
|
441 |
)
|
442 |
|
443 |
if __name__ == "__main__":
|
444 |
+
app.launch()
|
445 |
|