Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 20

Commit

ebc14af

verified ·

1 Parent(s): df3101e

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -36

app.py CHANGED Viewed

@@ -373,23 +373,24 @@ class TranscriptParser:
                 'assessments': {}
             }
-            # Extract student info
-            student_info_match = re.search(r"(\d{7}) - (.*?)\n", text)
             if student_info_match:
                 parsed_data['student_info']['id'] = student_info_match.group(1)
                 parsed_data['student_info']['name'] = student_info_match.group(2).strip()
-            # Extract grade and year info
-            grade_match = re.search(r"Current Grade:\s*(\d+)", text)
-            if grade_match:
-                parsed_data['student_info']['grade'] = grade_match.group(1)
-            yog_match = re.search(r"YOG\s*(\d{4})", text)
-            if yog_match:
-                parsed_data['student_info']['year_of_graduation'] = yog_match.group(1)
-            # Extract GPA information
-            gpa_matches = re.findall(r"(?:Un-weighted|Weighted)\s*GPA\s*([\d.]+)", text)
             if len(gpa_matches) >= 1:
                 parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0])
             if len(gpa_matches) >= 2:
@@ -414,7 +415,7 @@ class TranscriptParser:
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
-            # Extract requirements - specific to this format
             req_section = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
             if req_section:
                 req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
@@ -456,30 +457,44 @@ class TranscriptParser:
                             status = parts[4]
                             parsed_data['assessments'][name] = status
-            # Extract course history - specific to this format
-            course_section = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNum\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
             if course_section:
-                course_lines = [line.strip() for line in course_section.group(1).split('\n') if line.strip()]
                 for line in course_lines:
-                    if '|' in line:
-                        parts = [part.strip() for part in line.split('|')]
-                        if len(parts) >= 9:
-                            course = {
-                                'requirement': parts[0],
-                                'school_year': parts[1],
-                                'grade_level': parts[2],
-                                'course_code': parts[3],
-                                'description': parts[4],
-                                'term': parts[5],
-                                'district_number': parts[6],
-                                'fg': parts[7],
-                                'included': parts[8],
-                                'credits': parts[9] if len(parts) > 9 else "0"
-                            }
-                            # Handle inProgress credits
-                            if "inProgress" in course['credits'].lower():
-                                course['credits'] = "0"
-                            parsed_data['course_history'].append(course)
             return parsed_data

                 'assessments': {}
             }
+            # Extract student info with more robust pattern
+            student_info_match = re.search(
+                r"(\d{7})\s*-\s*(.*?)\s*\n.*?Current Grade:\s*(\d+).*?YOG\s*(\d{4})",
+                text,
+                re.DOTALL
+            )
             if student_info_match:
                 parsed_data['student_info']['id'] = student_info_match.group(1)
                 parsed_data['student_info']['name'] = student_info_match.group(2).strip()
+                parsed_data['student_info']['grade'] = student_info_match.group(3)
+                parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
+            # More robust GPA extraction
+            gpa_matches = re.findall(
+                r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
+                text,
+                re.IGNORECASE
+            )
             if len(gpa_matches) >= 1:
                 parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0])
             if len(gpa_matches) >= 2:
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
+            # Extract requirements
             req_section = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
             if req_section:
                 req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
                             status = parts[4]
                             parsed_data['assessments'][name] = status
+            # Extract course history with more fault-tolerant parsing
+            course_section = re.search(
+                r"Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits(.*?)(?:Legend|\Z)",
+                text,
+                re.DOTALL | re.IGNORECASE
+            )
             if course_section:
+                course_lines = [
+                    line.strip() for line in course_section.group(1).split('\n')
+                    if line.strip() and '|' in line
+                ]
                 for line in course_lines:
+                    parts = [part.strip() for part in line.split('|')]
+                    # Handle varying number of columns
+                    if len(parts) >= 9:
+                        course = {
+                            'requirement': parts[0] if len(parts) > 0 else "",
+                            'school_year': parts[1] if len(parts) > 1 else "",
+                            'grade_level': parts[2] if len(parts) > 2 else "",
+                            'course_code': parts[3] if len(parts) > 3 else "",
+                            'description': parts[4] if len(parts) > 4 else "",
+                            'term': parts[5] if len(parts) > 5 else "",
+                            'district_number': parts[6] if len(parts) > 6 else "",
+                            'fg': parts[7] if len(parts) > 7 else "",
+                            'included': parts[8] if len(parts) > 8 else "",
+                            'credits': parts[9] if len(parts) > 9 else "0"
+                        }
+                        # Handle "inProgress" and empty credits
+                        if "inProgress" in course['credits'].lower() or not course['credits']:
+                            course['credits'] = "0"
+                        elif not course['credits'].replace('.','').isdigit():
+                            course['credits'] = "0"
+                        parsed_data['course_history'].append(course)
             return parsed_data