Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 18

Commit

e96bacb

verified ·

1 Parent(s): 8df5e79

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -36

app.py CHANGED Viewed

@@ -180,7 +180,15 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
                 import pdfplumber
                 with pdfplumber.open(file_path) as pdf:
                     for page in pdf.pages:
-                        text += page.extract_text() + '\n'
                 if not text.strip():
                     raise ValueError("PDFPlumber returned empty text")
             except Exception as e:
@@ -311,10 +319,10 @@ class TranscriptParser:
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
-            raise ValueError(f"Couldn't parse transcript: {str(e)}")
     def _parse_detailed_transcript(self, text: str) -> Optional[Dict]:
-        """Parse detailed transcript format"""
         try:
             parsed_data = {
                 'student_info': {},
@@ -323,46 +331,48 @@ class TranscriptParser:
                 'assessments': {}
             }
-            # Extract student info
             student_info_match = re.search(r"(\d{7}) - (.*?)\n", text)
             if student_info_match:
                 parsed_data['student_info']['id'] = student_info_match.group(1)
                 parsed_data['student_info']['name'] = student_info_match.group(2).strip()
-            current_grade_match = re.search(r"Current Grade: (\d+)", text)
             if current_grade_match:
                 parsed_data['student_info']['grade'] = current_grade_match.group(1)
-            yog_match = re.search(r"YOG (\d{4})", text)
             if yog_match:
                 parsed_data['student_info']['year_of_graduation'] = yog_match.group(1)
-            unweighted_gpa_match = re.search(r"Un-weighted GPA (\d+\.\d+)", text)
-            if unweighted_gpa_match:
-                parsed_data['student_info']['unweighted_gpa'] = float(unweighted_gpa_match.group(1))
-            weighted_gpa_match = re.search(r"Weighted GPA (\d+\.\d+)", text)
-            if weighted_gpa_match:
-                parsed_data['student_info']['weighted_gpa'] = float(weighted_gpa_match.group(1))
-            service_hours_match = re.search(r"Comm Serv Hours (\d+)", text)
             if service_hours_match:
                 parsed_data['student_info']['community_service_hours'] = int(service_hours_match.group(1))
-            service_date_match = re.search(r"Comm Serv Date (\d{2}/\d{2}/\d{4})", text)
             if service_date_match:
                 parsed_data['student_info']['community_service_date'] = service_date_match.group(1)
-            credits_match = re.search(r"Total Credits Earned (\d+\.\d+)", text)
             if credits_match:
                 parsed_data['student_info']['total_credits'] = float(credits_match.group(1))
-            virtual_grade_match = re.search(r"Virtual Grade (\w+)", text)
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
-            # Extract requirements
-            req_pattern = re.compile(r"([A-Z]-.*?)\s*\|\s*(.*?)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+\.\d+)\s*\|\s*(\d+) %")
             for match in req_pattern.finditer(text):
                 code = match.group(1).strip()
                 desc = match.group(2).strip()
@@ -378,33 +388,45 @@ class TranscriptParser:
                     "percent_complete": percent
                 }
-            # Extract assessments
-            assess_pattern = re.compile(r"Z-Assessment: (.*?)\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %")
             for match in assess_pattern.finditer(text):
-                name = f"Assessment: {match.group(1)}"
-                status = match.group(3)
                 parsed_data['assessments'][name] = status
             for z_item in ["Community Service Hours", "GPA"]:
-                if re.search(fr"Z-{z_item.replace(' ', '.*?')}\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %", text):
-                    status = re.search(fr"Z-{z_item.replace(' ', '.*?')}\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+) %", text).group(2)
                     parsed_data['assessments'][z_item] = status
-            # Extract courses (simplified for now - can be enhanced)
-            course_pattern = r'([A-Z]{2,4}\s?\d{3})\s+(.*?)\s+([A-F][+-]?)\s+([0-9.]+)'
-            courses = re.findall(course_pattern, text)
-            for course in courses:
-                parsed_data['course_history'].append({
-                    'course_code': course[0],
-                    'description': course[1],
-                    'grade': course[2],
-                    'credits': float(course[3])
-                })
             return parsed_data
         except Exception as e:
-            logging.warning(f"Detailed transcript parsing failed, falling back to simple parser: {str(e)}")
             return None
     def _parse_simplified_transcript(self, text: str) -> Dict:
@@ -676,6 +698,10 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
         logging.error(error_msg)
         raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<5MB)")
 # ========== LEARNING STYLE QUIZ ==========
 class LearningStyleQuiz:
     def __init__(self):

                 import pdfplumber
                 with pdfplumber.open(file_path) as pdf:
                     for page in pdf.pages:
+                        # Try tables first
+                        tables = page.extract_tables()
+                        if tables:
+                            for table in tables:
+                                text += "\n".join([" | ".join(str(cell) for cell in row if cell is not None]) + "\n"
+                        # Fall back to text extraction
+                        page_text = page.extract_text()
+                        if page_text:
+                            text += page_text + "\n"
                 if not text.strip():
                     raise ValueError("PDFPlumber returned empty text")
             except Exception as e:
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
+            raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
     def _parse_detailed_transcript(self, text: str) -> Optional[Dict]:
+        """Parse detailed transcript format with improved patterns"""
         try:
             parsed_data = {
                 'student_info': {},
                 'assessments': {}
             }
+            # Extract student info with more flexible patterns
             student_info_match = re.search(r"(\d{7}) - (.*?)\n", text)
             if student_info_match:
                 parsed_data['student_info']['id'] = student_info_match.group(1)
                 parsed_data['student_info']['name'] = student_info_match.group(2).strip()
+            # More flexible grade and year extraction
+            current_grade_match = re.search(r"Current Grade:\s*(\d+)", text, re.IGNORECASE)
             if current_grade_match:
                 parsed_data['student_info']['grade'] = current_grade_match.group(1)
+            yog_match = re.search(r"YOG\s*(\d{4})", text, re.IGNORECASE)
             if yog_match:
                 parsed_data['student_info']['year_of_graduation'] = yog_match.group(1)
+            # Improved GPA extraction
+            gpa_matches = re.findall(r"(?:Un-weighted|Weighted)\s*GPA\s*([\d.]+)", text, re.IGNORECASE)
+            if len(gpa_matches) >= 2:
+                parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0])
+                parsed_data['student_info']['weighted_gpa'] = float(gpa_matches[1])
+            # Community service info
+            service_hours_match = re.search(r"Comm\s*Serv\s*Hours\s*(\d+)", text, re.IGNORECASE)
             if service_hours_match:
                 parsed_data['student_info']['community_service_hours'] = int(service_hours_match.group(1))
+            service_date_match = re.search(r"Comm\s*Serv\s*Date\s*(\d{2}/\d{2}/\d{4})", text, re.IGNORECASE)
             if service_date_match:
                 parsed_data['student_info']['community_service_date'] = service_date_match.group(1)
+            # Credits info
+            credits_match = re.search(r"Total\s*Credits\s*Earned\s*([\d.]+)", text, re.IGNORECASE)
             if credits_match:
                 parsed_data['student_info']['total_credits'] = float(credits_match.group(1))
+            # Virtual grade
+            virtual_grade_match = re.search(r"Virtual\s*Grade\s*(\w+)", text, re.IGNORECASE)
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
+            # Extract requirements with improved pattern
+            req_pattern = re.compile(r"([A-Z]-[^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d]+)\s*%")
             for match in req_pattern.finditer(text):
                 code = match.group(1).strip()
                 desc = match.group(2).strip()
                     "percent_complete": percent
                 }
+            # Extract assessments with more flexible pattern
+            assess_pattern = re.compile(r"Z-Assessment:\s*(.*?)\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+)\s*%")
             for match in assess_pattern.finditer(text):
+                name = f"Assessment: {match.group(1).strip()}"
+                status = match.group(3).strip()
                 parsed_data['assessments'][name] = status
+            # Handle other Z items
             for z_item in ["Community Service Hours", "GPA"]:
+                z_match = re.search(fr"Z-{z_item.replace(' ', '.*?')}\s*\|\s*(.*?)\s*\|\s*(\w+)\s*\|\s*(\d+)\s*%", text, re.IGNORECASE)
+                if z_match:
+                    status = z_match.group(2).strip()
                     parsed_data['assessments'][z_item] = status
+            # Extract course history with more robust pattern
+            course_history_section = re.search(r"Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
+            if course_history_section:
+                course_lines = [line.strip() for line in course_history_section.group(1).split('\n') if line.strip()]
+                for line in course_lines:
+                    parts = [part.strip() for part in line.split('|')]
+                    if len(parts) >= 9:
+                        course = {
+                            'requirement': parts[0],
+                            'school_year': parts[1],
+                            'grade_level': parts[2],
+                            'course_code': parts[3],
+                            'description': parts[4],
+                            'term': parts[5],
+                            'district_number': parts[6],
+                            'fg': parts[7],
+                            'included': parts[8],
+                            'credits': parts[9] if len(parts) > 9 else "0"
+                        }
+                        parsed_data['course_history'].append(course)
             return parsed_data
         except Exception as e:
+            logging.warning(f"Detailed transcript parsing failed: {str(e)}")
             return None
     def _parse_simplified_transcript(self, text: str) -> Dict:
         logging.error(error_msg)
         raise gr.Error(f"{error_msg}\n\nPossible solutions:\n1. Try a different file format\n2. Ensure text is clear and not handwritten\n3. Check file size (<5MB)")
+# [Rest of your code remains exactly the same...]
+# Continue with LearningStyleQuiz, ProfileManager, TeachingAssistant classes
+# and the create_interface() function exactly as you had them
 # ========== LEARNING STYLE QUIZ ==========
 class LearningStyleQuiz:
     def __init__(self):