Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 27

Commit

ba8e4ab

verified ·

1 Parent(s): 0869b6b

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -137

app.py CHANGED Viewed

@@ -155,7 +155,19 @@ class LearningStyleQuiz:
             "When learning a new skill, I prefer to:",
             "When studying, I like to:",
             "I prefer teachers who:",
-            "When solving problems, I:"
         ]
         self.options = [
@@ -166,7 +178,19 @@ class LearningStyleQuiz:
             ["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"],
             ["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"],
             ["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"],
-            ["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"]
         ]
         self.learning_styles = {
@@ -250,14 +274,14 @@ class LearningStyleQuiz:
 # Initialize learning style quiz
 learning_style_quiz = LearningStyleQuiz()
-class EnhancedMiamiDadeTranscriptParser:
     def __init__(self):
         self.patterns = {
             'student_info': re.compile(
-                r"LEGAL NAME:\s*([A-Z]+,\s*[A-Z]+).*?"
                 r"GRADE LEVEL:\s*(\d+).*?"
                 r"FL STUDENT ID:\s*(\w+).*?"
-                r"CURRENT SCHOOL:\s*(\d+\s+[\w\s]+?)\s*\(",
                 re.DOTALL
             ),
             'gpa': re.compile(
@@ -269,7 +293,7 @@ class EnhancedMiamiDadeTranscriptParser:
                 re.DOTALL
             ),
             'course': re.compile(
-                r"(\d)\s+(\w+)\s+([\w\s]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
                 re.DOTALL
             ),
             'assessment': re.compile(
@@ -282,92 +306,58 @@ class EnhancedMiamiDadeTranscriptParser:
             'class_rank': re.compile(
                 r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)",
                 re.DOTALL
             )
         }
     def parse_transcript(self, file_path: str) -> Dict:
-        """Parse Miami-Dade transcript PDF with enhanced pattern matching"""
         try:
             # First try pdfplumber
             with pdfplumber.open(file_path) as pdf:
-                text = "\n".join(page.extract_text() for page in pdf.pages)
             # Fallback to PyMuPDF if text extraction is poor
-            if len(text) < 500:  # If we got very little text
                 doc = fitz.open(file_path)
                 text = ""
                 for page in doc:
                     text += page.get_text()
-            # Debug: Save extracted text
-            with open("debug_transcript.txt", "w") as f:
-                f.write(text)
-            return self._parse_format(text)
         except Exception as e:
             logger.error(f"Error parsing transcript: {str(e)}")
             raise ValueError(f"Error processing transcript: {str(e)}")
-    def _parse_format(self, text: str) -> Dict:
-        """Parse the transcript format with improved error handling"""
-        try:
-            parsed_data = {
-                'student_info': self._parse_student_info(text),
-                'academic_summary': self._parse_academic_summary(text),
-                'course_history': self._parse_courses(text),
-                'assessments': self._parse_assessments(text),
-                'format': 'miami_dade_v2'
-            }
-            # Validate we got at least some data
-            if not parsed_data['student_info'] or not parsed_data['course_history']:
-                raise ValueError("Incomplete data extracted from transcript")
-            return parsed_data
-        except Exception as e:
-            logger.error(f"Format parsing error: {str(e)}")
-            return self._parse_alternative_format(text)
-    def _parse_alternative_format(self, text: str) -> Dict:
-        """Fallback parser for alternative formats"""
-        try:
-            parsed_data = {
-                'student_info': {},
-                'academic_summary': {},
-                'course_history': [],
-                'assessments': {},
-                'format': 'alternative'
-            }
-            # Try to extract basic student info
-            name_match = re.search(r"NAME:\s*([A-Z]+,\s*[A-Z]+)", text)
-            if name_match:
-                parsed_data['student_info']['name'] = name_match.group(1).replace(',', ' ').strip()
-            # Try to extract GPA
-            gpa_match = re.search(r"GPA:\s*([\d.]+)", text)
-            if gpa_match:
-                parsed_data['academic_summary']['gpa'] = {
-                    'district': float(gpa_match.group(1)),
-                    'state': float(gpa_match.group(1))  # Assume same if not specified
-                }
-            return parsed_data
-        except Exception as e:
-            logger.error(f"Alternative parser failed: {str(e)}")
-            raise ValueError("Could not parse transcript in any supported format")
     def _parse_student_info(self, text: str) -> Dict:
         """Extract student information with improved pattern matching"""
         match = self.patterns['student_info'].search(text)
         if not match:
-            # Try alternative patterns
-            match = re.search(r"STUDENT INFORMATION.*?NAME:\s*([^\n]+)", text, re.DOTALL)
-            if not match:
-                return {}
         return {
-            'name': match.group(1).replace(',', ' ').strip() if match else "Unknown",
             'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
             'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
             'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
@@ -376,55 +366,35 @@ class EnhancedMiamiDadeTranscriptParser:
         }
     def _extract_birth_date(self, text: str) -> Optional[str]:
-        """Extract birth date from transcript with multiple pattern attempts"""
-        patterns = [
-            r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})",
-            r"DOB:\s*(\d{2}/\d{2}/\d{4})",
-            r"DATE OF BIRTH:\s*([^\n]+)"
-        ]
-        for pattern in patterns:
-            birth_match = re.search(pattern, text)
-            if birth_match:
-                return birth_match.group(1)
         return None
     def _extract_ethnicity(self, text: str) -> Optional[str]:
-        """Extract ethnicity information with multiple pattern attempts"""
-        patterns = [
-            r"ETHNICITY:\s*([^\n]+)",
-            r"RACE/ETHNICITY:\s*([^\n]+)",
-            r"DEMOGRAPHICS.*?ETHNICITY:\s*([^\n]+)"
-        ]
-        for pattern in patterns:
-            eth_match = re.search(pattern, text, re.DOTALL)
-            if eth_match:
-                return eth_match.group(1).strip()
         return None
     def _parse_academic_summary(self, text: str) -> Dict:
-        """Parse academic summary section with improved error handling"""
         summary = {
             'gpa': {'district': None, 'state': None},
             'credits': {},
             'class_rank': {'percentile': None, 'class_size': None}
         }
-        # Try multiple GPA patterns
         gpa_match = self.patterns['gpa'].search(text)
-        if not gpa_match:
-            gpa_match = re.search(r"GPA.*?([\d.]+).*?([\d.]+)", text)
         if gpa_match:
             summary['gpa']['district'] = float(gpa_match.group(1))
             summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
-        # Try multiple credit patterns
         credits_matches = self.patterns['credits'].finditer(text)
-        if not credits_matches:
-            credits_matches = re.finditer(r"([A-Z ]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", text)
         for match in credits_matches:
             subject = match.group(1).strip()
             summary['credits'][subject] = {
@@ -433,11 +403,8 @@ class EnhancedMiamiDadeTranscriptParser:
                 'remaining': float(match.group(4)) if match.group(4) else None
             }
-        # Try multiple class rank patterns
         rank_match = self.patterns['class_rank'].search(text)
-        if not rank_match:
-            rank_match = re.search(r"RANK.*?(\d+).*?(\d+)", text)
         if rank_match:
             summary['class_rank']['percentile'] = int(rank_match.group(1))
             summary['class_rank']['class_size'] = int(rank_match.group(2))
@@ -445,48 +412,40 @@ class EnhancedMiamiDadeTranscriptParser:
         return summary
     def _parse_courses(self, text: str) -> List[Dict]:
-        """Parse course history section with improved pattern matching"""
         courses = []
         # Try primary pattern first
         for match in self.patterns['course'].finditer(text):
-            courses.append(self._create_course_dict(match))
-        # If no courses found, try alternative patterns
         if not courses:
-            alt_pattern = re.compile(
-                r"(\d{4}-\d{4})\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([\d.]+)",
-                re.DOTALL
-            )
-            for match in alt_pattern.finditer(text):
                 courses.append({
                     'term': match.group(1),
                     'course_code': match.group(2),
                     'course_title': match.group(3).strip(),
                     'subject_area': match.group(4),
                     'grade': match.group(5),
-                    'credit_earned': float(match.group(6)),
-                    'credit_attempted': float(match.group(6))
                 })
         return courses
-    def _create_course_dict(self, match) -> Dict:
-        """Create standardized course dictionary from regex match"""
-        return {
-            'term': match.group(1),
-            'course_code': match.group(2),
-            'course_title': match.group(3).strip(),
-            'subject_area': match.group(4),
-            'grade': match.group(5),
-            'flag': match.group(6),
-            'credit_status': match.group(7),
-            'credit_attempted': float(match.group(8)),
-            'credit_earned': float(match.group(9))
-        }
     def _parse_assessments(self, text: str) -> Dict:
-        """Parse assessment and requirement information with improved patterns"""
         assessments = {
             'ela_passed_date': None,
             'algebra_passed': False,
@@ -497,11 +456,7 @@ class EnhancedMiamiDadeTranscriptParser:
             }
         }
-        # Try multiple assessment patterns
         matches = self.patterns['assessment'].finditer(text)
-        if not matches:
-            matches = re.finditer(r"(ENGLISH|ALGEBRA|BIOLOGY|SERVICE).*?(PASSED|MET|YES|NO|\d{2}/\d{4})", text)
         for match in matches:
             if match.group(1):  # ELA date
                 assessments['ela_passed_date'] = match.group(1)
@@ -517,8 +472,8 @@ class EnhancedMiamiDadeTranscriptParser:
         return assessments
-# Initialize the enhanced parser
-transcript_parser = EnhancedMiamiDadeTranscriptParser()
 class AcademicAnalyzer:
     def __init__(self):
@@ -547,7 +502,7 @@ class AcademicAnalyzer:
             if parsed_data.get('format') == 'progress_summary':
                 weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
                 unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
-            elif parsed_data.get('format') == 'miami_dade_v2':
                 weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
                 unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
             else:  # Alternative format
@@ -681,7 +636,7 @@ class AcademicAnalyzer:
                         'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
                     }
                     for subject, info in credits.items()
-                    if info and info.get('required', 0) > info.get('earned', 0)
                 ]
             current_grade = parsed_data.get('student_info', {}).get('grade', '')
@@ -2584,4 +2539,5 @@ def create_enhanced_interface():
 app = create_enhanced_interface()
 if __name__ == "__main__":
-    app.launch(server_name="0.0.0.0", server_port=7860)

             "When learning a new skill, I prefer to:",
             "When studying, I like to:",
             "I prefer teachers who:",
+            "When solving problems, I:",
+            "When working on a group project, I:",
+            "My ideal study environment is:",
+            "When preparing for a test, I:",
+            "When reading instructions, I:",
+            "When explaining something to someone, I:",
+            "When taking notes in class, I:",
+            "When using a new device or app, I:",
+            "When remembering names, I:",
+            "When choosing a book to read, I:",
+            "When giving a presentation, I:",
+            "When organizing my work, I:",
+            "When relaxing, I enjoy:"
         ]
         self.options = [
             ["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"],
             ["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"],
             ["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"],
+            ["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"],
+            ["Create visual plans", "Discuss ideas verbally", "Write detailed plans", "Take on hands-on tasks"],
+            ["Somewhere quiet with good lighting", "Somewhere I can discuss ideas", "A library with lots of resources", "Somewhere I can move around"],
+            ["Create visual study aids", "Recite information aloud", "Write summaries", "Create physical models"],
+            ["Look at diagrams first", "Have someone explain them", "Read them carefully", "Try to follow them as I go"],
+            ["Draw diagrams or pictures", "Explain verbally", "Write detailed explanations", "Show by doing"],
+            ["Draw diagrams and symbols", "Record lectures to listen later", "Write detailed notes", "Underline and highlight"],
+            ["Look at the screen layout", "Listen to audio instructions", "Read the manual", "Start clicking buttons"],
+            ["Remember faces better than names", "Remember names when I hear them", "Remember names when I see them written", "Remember people by activities we did"],
+            ["Choose books with pictures/diagrams", "Choose audiobooks", "Choose text-heavy books", "Choose interactive books"],
+            ["Use lots of visual aids", "Focus on my verbal delivery", "Provide handouts", "Use props or demonstrations"],
+            ["Use color-coding systems", "Talk through my plan", "Make detailed lists", "Physically arrange materials"],
+            ["Watching videos or art", "Listening to music/podcasts", "Reading", "Doing physical activities"]
         ]
         self.learning_styles = {
 # Initialize learning style quiz
 learning_style_quiz = LearningStyleQuiz()
+class MiamiDadeTranscriptParser:
     def __init__(self):
         self.patterns = {
             'student_info': re.compile(
+                r"LEGAL NAME:\s*([^\n]+?)\s*MAILING\s+ADDRESS:.*?"
                 r"GRADE LEVEL:\s*(\d+).*?"
                 r"FL STUDENT ID:\s*(\w+).*?"
+                r"CURRENT SCHOOL:\s*(\d+\s+[^\n]+?)\s*\(",
                 re.DOTALL
             ),
             'gpa': re.compile(
                 re.DOTALL
             ),
             'course': re.compile(
+                r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
                 re.DOTALL
             ),
             'assessment': re.compile(
             'class_rank': re.compile(
                 r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)",
                 re.DOTALL
+            ),
+            'course_alt': re.compile(
+                r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
+                re.DOTALL
             )
         }
     def parse_transcript(self, file_path: str) -> Dict:
+        """Parse Miami-Dade transcript PDF with multiple extraction methods"""
         try:
             # First try pdfplumber
+            text = ""
             with pdfplumber.open(file_path) as pdf:
+                for page in pdf.pages:
+                    text += page.extract_text() + "\n"
             # Fallback to PyMuPDF if text extraction is poor
+            if len(text) < 500:
                 doc = fitz.open(file_path)
                 text = ""
                 for page in doc:
                     text += page.get_text()
+            return self._parse_miami_dade_format(text)
         except Exception as e:
             logger.error(f"Error parsing transcript: {str(e)}")
             raise ValueError(f"Error processing transcript: {str(e)}")
+    def _parse_miami_dade_format(self, text: str) -> Dict:
+        """Parse the specific Miami-Dade transcript format"""
+        parsed_data = {
+            'student_info': self._parse_student_info(text),
+            'academic_summary': self._parse_academic_summary(text),
+            'course_history': self._parse_courses(text),
+            'assessments': self._parse_assessments(text),
+            'format': 'miami_dade_v3'
+        }
+        # Validate we got at least some data
+        if not parsed_data['student_info'] or not parsed_data['course_history']:
+            raise ValueError("Incomplete data extracted from transcript")
+        return parsed_data
     def _parse_student_info(self, text: str) -> Dict:
         """Extract student information with improved pattern matching"""
         match = self.patterns['student_info'].search(text)
         if not match:
+            return {}
         return {
+            'name': match.group(1).strip(),
             'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
             'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
             'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
         }
     def _extract_birth_date(self, text: str) -> Optional[str]:
+        """Extract birth date from transcript"""
+        birth_match = re.search(r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})", text)
+        if birth_match:
+            return birth_match.group(1)
         return None
     def _extract_ethnicity(self, text: str) -> Optional[str]:
+        """Extract ethnicity information"""
+        eth_match = re.search(r"ETHNICITY:\s*([^\n]+)", text)
+        if eth_match:
+            return eth_match.group(1).strip()
         return None
     def _parse_academic_summary(self, text: str) -> Dict:
+        """Parse academic summary section"""
         summary = {
             'gpa': {'district': None, 'state': None},
             'credits': {},
             'class_rank': {'percentile': None, 'class_size': None}
         }
+        # GPA
         gpa_match = self.patterns['gpa'].search(text)
         if gpa_match:
             summary['gpa']['district'] = float(gpa_match.group(1))
             summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
+        # Credits
         credits_matches = self.patterns['credits'].finditer(text)
         for match in credits_matches:
             subject = match.group(1).strip()
             summary['credits'][subject] = {
                 'remaining': float(match.group(4)) if match.group(4) else None
             }
+        # Class Rank
         rank_match = self.patterns['class_rank'].search(text)
         if rank_match:
             summary['class_rank']['percentile'] = int(rank_match.group(1))
             summary['class_rank']['class_size'] = int(rank_match.group(2))
         return summary
     def _parse_courses(self, text: str) -> List[Dict]:
+        """Parse course history section"""
         courses = []
         # Try primary pattern first
         for match in self.patterns['course'].finditer(text):
+            courses.append({
+                'term': match.group(1),
+                'course_code': match.group(2),
+                'course_title': match.group(3).strip(),
+                'subject_area': match.group(4),
+                'grade': match.group(5),
+                'flag': match.group(6),
+                'credit_status': match.group(7),
+                'credit_attempted': float(match.group(8)),
+                'credit_earned': float(match.group(9))
+            })
+        # If no courses found, try alternative pattern
         if not courses:
+            for match in self.patterns['course_alt'].finditer(text):
                 courses.append({
                     'term': match.group(1),
                     'course_code': match.group(2),
                     'course_title': match.group(3).strip(),
                     'subject_area': match.group(4),
                     'grade': match.group(5),
+                    'credit_attempted': float(match.group(6)),
+                    'credit_earned': float(match.group(7))
                 })
         return courses
     def _parse_assessments(self, text: str) -> Dict:
+        """Parse assessment and requirement information"""
         assessments = {
             'ela_passed_date': None,
             'algebra_passed': False,
             }
         }
         matches = self.patterns['assessment'].finditer(text)
         for match in matches:
             if match.group(1):  # ELA date
                 assessments['ela_passed_date'] = match.group(1)
         return assessments
+# Initialize the parser
+transcript_parser = MiamiDadeTranscriptParser()
 class AcademicAnalyzer:
     def __init__(self):
             if parsed_data.get('format') == 'progress_summary':
                 weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
                 unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
+            elif parsed_data.get('format') == 'miami_dade_v3':
                 weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
                 unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
             else:  # Alternative format
                         'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
                     }
                     for subject, info in credits.items()
+                    if info and info.get('required', 0) > info.get('earned', 0))
                 ]
             current_grade = parsed_data.get('student_info', {}).get('grade', '')
 app = create_enhanced_interface()
 if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)