Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 16

Commit

929de97

verified ·

1 Parent(s): c3e05e0

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -17

app.py CHANGED Viewed

@@ -282,7 +282,7 @@ def remove_sensitive_info(text: str) -> str:
     # Remove student IDs (assuming 6-9 digit numbers)
     text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
     # Remove email addresses
-    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
     return text
 # ========== TRANSCRIPT PARSING ==========
@@ -322,7 +322,7 @@ class TranscriptParser:
     def detect_format(self, text: str) -> str:
         """Detect the transcript format"""
         # Check for Miami-Dade specific patterns
-        if re.search(r'MIAMI-DADE SCHOOL DISTRICT', text, re.IGNORECASE):
             return 'miami_dade'
         # Check for homeschool patterns
         elif re.search(r'homeschool|home education|parent signature', text, re.IGNORECASE):
@@ -334,35 +334,35 @@ class TranscriptParser:
         """Parse Miami-Dade formatted transcripts with enhanced error handling"""
         try:
             # Extract student info with more robust patterns
-            student_info = re.search(
                 r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+).*?Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
                 r".*?Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
                 r".*?Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)",
                 text, re.DOTALL
             )
-            if student_info:
                 self.student_data = {
-                    "id": student_info.group(1),
-                    "name": student_info.group(2).replace(",", ", "),
-                    "current_grade": student_info.group(3),
-                    "graduation_year": student_info.group(4),
-                    "unweighted_gpa": float(student_info.group(5)),
-                    "weighted_gpa": float(student_info.group(6)),
-                    "total_credits": float(student_info.group(7)),
-                    "community_service_hours": int(student_info.group(8))
                 }
             # Extract requirements with better table parsing
-            req_table = re.search(
                 r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)Total\s*[\d.]+\s*[\d.]+\s*[\d.]+\s*[\d.]+%",
                 text, re.DOTALL
             )
-            if req_table:
                 req_matches = re.finditer(
                     r"([A-Z]-[\w\s\(\)&]+)\s*([^\n]+?)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%",
-                    req_table.group(1)
                 )
                 for match in req_matches:
@@ -376,7 +376,10 @@ class TranscriptParser:
                     }
             # Extract course history with more flexible parsing
-            course_section = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description(.*?)(?=Legend for Incl:|$)", text, re.DOTALL)
             if course_section:
                 course_matches = re.finditer(
@@ -1893,5 +1896,4 @@ app = create_interface()
 if __name__ == "__main__":
     app.launch()

     # Remove student IDs (assuming 6-9 digit numbers)
     text = re.sub(r'\b\d{6,9}\b', '[ID]', text)
     # Remove email addresses
+    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
     return text
 # ========== TRANSCRIPT PARSING ==========
     def detect_format(self, text: str) -> str:
         """Detect the transcript format"""
         # Check for Miami-Dade specific patterns
+        if re.search(r'MIAMI-DADE (COUNTY|COUNTRY) PUBLIC SCHOOLS', text, re.IGNORECASE):
             return 'miami_dade'
         # Check for homeschool patterns
         elif re.search(r'homeschool|home education|parent signature', text, re.IGNORECASE):
         """Parse Miami-Dade formatted transcripts with enhanced error handling"""
         try:
             # Extract student info with more robust patterns
+            student_match = re.search(
                 r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+).*?Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
                 r".*?Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
                 r".*?Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)",
                 text, re.DOTALL
             )
+            if student_match:
                 self.student_data = {
+                    "id": student_match.group(1),
+                    "name": student_match.group(2).replace(",", ", "),
+                    "current_grade": student_match.group(3),
+                    "graduation_year": student_match.group(4),
+                    "unweighted_gpa": float(student_match.group(5)),
+                    "weighted_gpa": float(student_match.group(6)),
+                    "total_credits": float(student_match.group(7)),
+                    "community_service_hours": int(student_match.group(8))
                 }
             # Extract requirements with better table parsing
+            req_section = re.search(
                 r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)Total\s*[\d.]+\s*[\d.]+\s*[\d.]+\s*[\d.]+%",
                 text, re.DOTALL
             )
+            if req_section:
                 req_matches = re.finditer(
                     r"([A-Z]-[\w\s\(\)&]+)\s*([^\n]+?)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%",
+                    req_section.group(1)
                 )
                 for match in req_matches:
                     }
             # Extract course history with more flexible parsing
+            course_section = re.search(
+                r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description(.*?)(?=Legend for Incl:|$)",
+                text, re.DOTALL
+            )
             if course_section:
                 course_matches = re.finditer(
 if __name__ == "__main__":
     app.launch()